Merge branch 'master' of github.com:samuelclay/NewsBlur

This commit is contained in:
Samuel Clay 2010-08-30 22:58:45 -04:00
commit 1454ba1063
23 changed files with 588 additions and 200 deletions

View file

@ -56,9 +56,13 @@ class OPMLImporter(Importer):
if not hasattr(feed, 'title'):
setattr(feed, 'title', feed.htmlUrl)
feed_address = urlnorm.normalize(feed.xmlUrl)
if len(feed_address) > 255:
continue
feed_link = urlnorm.normalize(feed.htmlUrl)
if len(feed_address) > Feed._meta.get_field('feed_address').max_length:
continue
if len(feed_link) > Feed._meta.get_field('feed_link').max_length:
continue
if len(feed.title) > Feed._meta.get_field('feed_title').max_length:
feed.title = feed.title[:255]
logging.info(' ---> \t%s - %s - %s' % (feed.title, feed_link, feed_address,))
feed_data = dict(feed_address=feed_address, feed_link=feed_link, feed_title=feed.title)
# feeds.append(feed_data)
@ -123,7 +127,7 @@ class GoogleReaderImporter(Importer):
feed_link = urlnorm.normalize(feed_link)
feed_address = urlnorm.normalize(feed_address)
if len(feed_address) > 255:
if len(feed_address) > Feed._meta.get_field('feed_address').max_length:
return folders
# See if it exists as a duplicate first

View file

@ -1,5 +1,6 @@
import datetime
import random
import zlib
from django.shortcuts import render_to_response, get_object_or_404
from django.contrib.auth.decorators import login_required
from django.template import RequestContext
@ -9,15 +10,16 @@ from django.db.models import Q
from django.core.urlresolvers import reverse
from django.contrib.auth import login as login_user
from django.contrib.auth.models import User
from django.http import HttpResponse, HttpResponseRedirect, HttpResponseForbidden
from django.http import HttpResponse, HttpResponseRedirect, HttpResponseForbidden, Http404
from django.conf import settings
from mongoengine.queryset import OperationError
from apps.analyzer.models import MClassifierTitle, MClassifierAuthor, MClassifierFeed, MClassifierTag
from apps.analyzer.models import apply_classifier_titles, apply_classifier_feeds, apply_classifier_authors, apply_classifier_tags
from apps.analyzer.models import get_classifiers_for_user
from apps.reader.models import UserSubscription, UserSubscriptionFolders, MUserStory, Feature
from apps.reader.forms import SignupForm, LoginForm, FeatureForm
try:
from apps.rss_feeds.models import Feed, FeedPage, DuplicateFeed, MStory
from apps.rss_feeds.models import Feed, MFeedPage, DuplicateFeed, MStory
except:
pass
from utils import json, urlnorm
@ -115,8 +117,9 @@ def load_feeds(request):
if not sub.feed.fetched_once:
not_yet_fetched = True
feeds[sub.feed.pk]['not_yet_fetched'] = True
if sub.feed.has_exception:
if sub.feed.has_page_exception or sub.feed.has_feed_exception:
feeds[sub.feed.pk]['has_exception'] = True
feeds[sub.feed.pk]['exception_type'] = 'feed' if sub.feed.has_feed_exception else 'page'
feeds[sub.feed.pk]['feed_address'] = sub.feed.feed_address
feeds[sub.feed.pk]['exception_code'] = sub.feed.exception_code
@ -195,8 +198,9 @@ def refresh_feeds(request):
'nt': sub.unread_count_neutral,
'ng': sub.unread_count_negative,
}
if sub.feed.has_exception:
if sub.feed.has_feed_exception or sub.feed.has_page_exception:
feeds[sub.feed.pk]['has_exception'] = True
feeds[sub.feed.pk]['exception_type'] = 'feed' if sub.feed.has_feed_exception else 'page'
feeds[sub.feed.pk]['feed_address'] = sub.feed.feed_address
feeds[sub.feed.pk]['exception_code'] = sub.feed.exception_code
if request.POST.get('check_fetch_status', False):
@ -279,12 +283,15 @@ def load_single_feed(request):
return data
def load_feed_page(request):
feed = get_object_or_404(Feed, id=request.REQUEST.get('feed_id'))
feed_page, created = FeedPage.objects.get_or_create(feed=feed)
feed_id = int(request.GET.get('feed_id', 0))
if feed_id == 0:
raise Http404
feed_page, created = MFeedPage.objects.get_or_create(feed_id=feed_id)
data = None
if not created:
data = feed.feed_page.page_data
data = feed_page.page_data and zlib.decompress(feed_page.page_data)
if created:
data = "Fetching feed..."
@ -341,7 +348,7 @@ def mark_story_as_read(request):
try:
m.save()
data.update({'code': 1})
except IntegrityError:
except OperationError:
data.update({'code': -1})
return data
@ -391,8 +398,7 @@ def add_url(request):
if duplicate_feed:
feed = [duplicate_feed[0].feed]
else:
feed = Feed.objects.filter(Q(feed_address=url)
| Q(feed_link__icontains=url))
feed = Feed.objects.filter(feed_address=url)
if feed:
feed = feed[0]
@ -406,7 +412,7 @@ def add_url(request):
if not feed:
code = -1
message = "That URL does not point to a website or RSS feed."
message = "That URL does not point to an RSS feed or a website that has an RSS feed."
else:
us, _ = UserSubscription.objects.get_or_create(
feed=feed,

View file

@ -7,7 +7,7 @@ import multiprocessing
import traceback
import feedparser
from utils import log as logging
from apps.rss_feeds.models import FeedPage
from apps.rss_feeds.models import MFeedPage
class PageImporter(object):
@ -81,6 +81,6 @@ class PageImporter(object):
def save_page(self, html):
if html and len(html) > 100:
feed_page, _ = FeedPage.objects.get_or_create(feed=self.feed)
feed_page, _ = MFeedPage.objects.get_or_create(feed_id=self.feed.pk)
feed_page.page_data = html
feed_page.save()

View file

@ -39,6 +39,7 @@ class Command(BaseCommand):
for feed in feeds:
feed.set_next_scheduled_update()
print '.',
return
socket.setdefaulttimeout(options['timeout'])
feeds = Feed.objects.filter(next_scheduled_update__lte=now).order_by('?')

View file

@ -0,0 +1,132 @@
# encoding: utf-8
import datetime
from south.db import db
from south.v2 import SchemaMigration
from django.db import models
class Migration(SchemaMigration):
def forwards(self, orm):
# Adding field 'Feed.has_feed_exception'
db.add_column('feeds', 'has_feed_exception', self.gf('django.db.models.fields.BooleanField')(default=False), keep_default=False)
# Adding field 'Feed.has_page_exception'
db.add_column('feeds', 'has_page_exception', self.gf('django.db.models.fields.BooleanField')(default=False), keep_default=False)
def backwards(self, orm):
# Deleting field 'Feed.has_feed_exception'
db.delete_column('feeds', 'has_feed_exception')
# Deleting field 'Feed.has_page_exception'
db.delete_column('feeds', 'has_page_exception')
models = {
'rss_feeds.duplicatefeed': {
'Meta': {'object_name': 'DuplicateFeed'},
'duplicate_address': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '255'}),
'feed': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'duplicate_addresses'", 'to': "orm['rss_feeds.Feed']"}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'})
},
'rss_feeds.feed': {
'Meta': {'ordering': "['feed_title']", 'object_name': 'Feed', 'db_table': "'feeds'"},
'active': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
'average_stories_per_month': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
'creation': ('django.db.models.fields.DateField', [], {'auto_now_add': 'True', 'blank': 'True'}),
'days_to_trim': ('django.db.models.fields.IntegerField', [], {'default': '90'}),
'etag': ('django.db.models.fields.CharField', [], {'max_length': '50', 'null': 'True', 'blank': 'True'}),
'exception_code': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
'feed_address': ('django.db.models.fields.URLField', [], {'unique': 'True', 'max_length': '255'}),
'feed_link': ('django.db.models.fields.URLField', [], {'default': "''", 'max_length': '1000', 'null': 'True', 'blank': 'True'}),
'feed_tagline': ('django.db.models.fields.CharField', [], {'default': "''", 'max_length': '1024', 'null': 'True', 'blank': 'True'}),
'feed_title': ('django.db.models.fields.CharField', [], {'default': "''", 'max_length': '255', 'null': 'True', 'blank': 'True'}),
'fetched_once': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
'has_exception': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
'has_feed_exception': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
'has_page_exception': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'last_load_time': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
'last_modified': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}),
'last_update': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}),
'min_to_decay': ('django.db.models.fields.IntegerField', [], {'default': '15'}),
'next_scheduled_update': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
'num_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
'popular_authors': ('django.db.models.fields.CharField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
'popular_tags': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
'stories_last_month': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
'story_count_history': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'})
},
'rss_feeds.feedfetchhistory': {
'Meta': {'object_name': 'FeedFetchHistory'},
'exception': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
'feed': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'feed_fetch_history'", 'to': "orm['rss_feeds.Feed']"}),
'fetch_date': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'message': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'blank': 'True'}),
'status_code': ('django.db.models.fields.CharField', [], {'max_length': '10', 'null': 'True', 'blank': 'True'})
},
'rss_feeds.feedpage': {
'Meta': {'object_name': 'FeedPage'},
'feed': ('django.db.models.fields.related.OneToOneField', [], {'related_name': "'feed_page'", 'unique': 'True', 'to': "orm['rss_feeds.Feed']"}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'page_data': ('utils.compressed_textfield.StoryField', [], {'null': 'True', 'blank': 'True'})
},
'rss_feeds.feedupdatehistory': {
'Meta': {'object_name': 'FeedUpdateHistory'},
'average_per_feed': ('django.db.models.fields.DecimalField', [], {'max_digits': '4', 'decimal_places': '1'}),
'fetch_date': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'number_of_feeds': ('django.db.models.fields.IntegerField', [], {}),
'seconds_taken': ('django.db.models.fields.IntegerField', [], {})
},
'rss_feeds.feedxml': {
'Meta': {'object_name': 'FeedXML'},
'feed': ('django.db.models.fields.related.OneToOneField', [], {'related_name': "'feed_xml'", 'unique': 'True', 'to': "orm['rss_feeds.Feed']"}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'rss_xml': ('utils.compressed_textfield.StoryField', [], {'null': 'True', 'blank': 'True'})
},
'rss_feeds.pagefetchhistory': {
'Meta': {'object_name': 'PageFetchHistory'},
'exception': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
'feed': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'page_fetch_history'", 'to': "orm['rss_feeds.Feed']"}),
'fetch_date': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'message': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'blank': 'True'}),
'status_code': ('django.db.models.fields.CharField', [], {'max_length': '10', 'null': 'True', 'blank': 'True'})
},
'rss_feeds.story': {
'Meta': {'ordering': "['-story_date']", 'unique_together': "(('story_feed', 'story_guid_hash'),)", 'object_name': 'Story', 'db_table': "'stories'"},
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'story_author': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['rss_feeds.StoryAuthor']"}),
'story_author_name': ('django.db.models.fields.CharField', [], {'max_length': '500', 'null': 'True', 'blank': 'True'}),
'story_content': ('utils.compressed_textfield.StoryField', [], {'null': 'True', 'blank': 'True'}),
'story_content_type': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'blank': 'True'}),
'story_date': ('django.db.models.fields.DateTimeField', [], {}),
'story_feed': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'stories'", 'to': "orm['rss_feeds.Feed']"}),
'story_guid': ('django.db.models.fields.CharField', [], {'max_length': '1000'}),
'story_guid_hash': ('django.db.models.fields.CharField', [], {'max_length': '40'}),
'story_original_content': ('utils.compressed_textfield.StoryField', [], {'null': 'True', 'blank': 'True'}),
'story_past_trim_date': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
'story_permalink': ('django.db.models.fields.CharField', [], {'max_length': '1000'}),
'story_tags': ('django.db.models.fields.CharField', [], {'max_length': '2000', 'null': 'True', 'blank': 'True'}),
'story_title': ('django.db.models.fields.CharField', [], {'max_length': '255'}),
'tags': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['rss_feeds.Tag']", 'symmetrical': 'False'})
},
'rss_feeds.storyauthor': {
'Meta': {'object_name': 'StoryAuthor'},
'author_name': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'blank': 'True'}),
'feed': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['rss_feeds.Feed']"}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'})
},
'rss_feeds.tag': {
'Meta': {'object_name': 'Tag'},
'feed': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['rss_feeds.Feed']"}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'name': ('django.db.models.fields.CharField', [], {'max_length': '255'})
}
}
complete_apps = ['rss_feeds']

View file

@ -7,6 +7,7 @@ import random
import re
import mongoengine as mongo
import pymongo
import zlib
from collections import defaultdict
from operator import itemgetter
from BeautifulSoup import BeautifulStoneSoup
@ -35,7 +36,9 @@ class Feed(models.Model):
num_subscribers = models.IntegerField(default=0)
last_update = models.DateTimeField(auto_now=True)
fetched_once = models.BooleanField(default=False)
has_exception = models.BooleanField(default=False)
has_exception = models.BooleanField(default=False) # TODO: Remove in lieu of below 2 columns
has_feed_exception = models.BooleanField(default=False)
has_page_exception = models.BooleanField(default=False)
exception_code = models.IntegerField(default=0)
min_to_decay = models.IntegerField(default=15)
days_to_trim = models.IntegerField(default=90)
@ -89,12 +92,12 @@ class Feed(models.Model):
try:
self.feed_address = feed_address
self.next_scheduled_update = datetime.datetime.now()
self.has_exception = False
self.has_feed_exception = False
self.active = True
self.save()
except:
original_feed = Feed.objects.get(feed_address=feed_address)
original_feed.has_exception = False
original_feed.has_feed_exception = False
original_feed.active = True
original_feed.save()
merge_feeds(original_feed.pk, self.pk)
@ -102,45 +105,52 @@ class Feed(models.Model):
return not not feed_address
def save_feed_history(self, status_code, message, exception=None):
FeedFetchHistory.objects.create(feed=self,
status_code=status_code,
message=message,
exception=exception)
old_fetch_histories = self.feed_fetch_history.all().order_by('-fetch_date')[10:]
MFeedFetchHistory(feed_id=self.pk,
status_code=int(status_code),
message=message,
exception=exception,
fetch_date=datetime.datetime.now()).save()
old_fetch_histories = MFeedFetchHistory.objects(feed_id=self.pk).order_by('-fetch_date')[10:]
for history in old_fetch_histories:
history.delete()
if status_code >= 400:
fetch_history = self.feed_fetch_history.all().values('status_code')
self.count_errors_in_history(fetch_history, status_code)
elif self.has_exception:
self.has_exception = False
fetch_history = map(lambda h: h.status_code,
MFeedFetchHistory.objects(feed_id=self.pk))
self.count_errors_in_history(fetch_history, status_code, 'feed')
elif self.has_feed_exception:
self.has_feed_exception = False
self.active = True
self.save()
def save_page_history(self, status_code, message, exception=None):
PageFetchHistory.objects.create(feed=self,
status_code=status_code,
message=message,
exception=exception)
old_fetch_histories = self.page_fetch_history.all()[10:]
MPageFetchHistory(feed_id=self.pk,
status_code=int(status_code),
message=message,
exception=exception,
fetch_date=datetime.datetime.now()).save()
old_fetch_histories = MPageFetchHistory.objects(feed_id=self.pk).order_by('-fetch_date')[10:]
for history in old_fetch_histories:
history.delete()
if status_code >= 400:
fetch_history = self.page_fetch_history.all().values('status_code')
self.count_errors_in_history(fetch_history, status_code)
elif self.has_exception:
self.has_exception = False
fetch_history = map(lambda h: h.status_code,
MPageFetchHistory.objects(feed_id=self.pk))
self.count_errors_in_history(fetch_history, status_code, 'page')
elif self.has_page_exception:
self.has_page_exception = False
self.active = True
self.save()
def count_errors_in_history(self, fetch_history, status_code):
non_errors = [h for h in fetch_history if int(h['status_code']) < 400]
errors = [h for h in fetch_history if int(h['status_code']) >= 400]
def count_errors_in_history(self, fetch_history, status_code, exception_type):
non_errors = [h for h in fetch_history if int(h) < 400]
errors = [h for h in fetch_history if int(h) >= 400]
if len(non_errors) == 0 and len(errors) >= 1:
self.has_exception = True
if exception_type == 'feed':
self.has_feed_exception = True
elif exception_type == 'page':
self.has_page_exception = True
self.active = False
self.exception_code = status_code
self.save()
@ -165,7 +175,7 @@ class Feed(models.Model):
def count_stories(self, verbose=False, lock=None):
self.save_feed_stories_last_month(verbose, lock)
self.save_feed_story_history(lock)
# self.save_feed_story_history_statistics(lock)
def save_feed_stories_last_month(self, verbose=False, lock=None):
month_ago = datetime.datetime.now() - datetime.timedelta(days=30)
@ -173,52 +183,60 @@ class Feed(models.Model):
story_date__gte=month_ago).count()
self.stories_last_month = stories_last_month
# self.save_feed_story_history(lock)
self.save(lock=lock)
if verbose:
print " ---> %s [%s]: %s stories last month" % (self.feed_title, self.pk,
self.stories_last_month)
def save_feed_story_history(self, lock=None):
def save_feed_story_history_statistics(self, lock=None, current_counts=None):
"""
Fills in missing months between earlier occurances and now.
Save format: [('YYYY-MM, #), ...]
Example output: [(2010-12, 123), (2011-01, 146)]
"""
d = defaultdict(int)
now = datetime.datetime.now()
min_year = now.year
total = 0
month_count = 0
current_counts = self.story_count_history and json.decode(self.story_count_history)
if not current_counts:
current_counts = self.story_count_history and json.decode(self.story_count_history)
if not current_counts:
current_counts = []
# Count stories, aggregate by year and month
stories = Story.objects.filter(story_feed=self).extra(select={
'year': "EXTRACT(year FROM story_date)",
'month': "EXTRACT(month from story_date)"
}).values('year', 'month')
for story in stories:
year = int(story['year'])
d['%s-%s' % (year, int(story['month']))] += 1
if year < min_year:
min_year = year
# Count stories, aggregate by year and month. Map Reduce!
map_f = """
function() {
var date = (this.story_date.getFullYear()) + "-" + (this.story_date.getMonth()+1);
emit(date, 1);
}
"""
reduce_f = """
function(key, values) {
var total = 0;
for (var i=0; i < values.length; i++) {
total += values[i];
}
return total;
}
"""
dates = {}
res = MStory.objects(story_feed_id=self.pk).map_reduce(map_f, reduce_f)
for r in res:
dates[r.key] = r.value
# Add on to existing months, always amending up, never down. (Current month
# is guaranteed to be accurate, since trim_feeds won't delete it until after
# a month. Hacker News can have 1,000+ and still be counted.)
for current_month, current_count in current_counts:
if current_month not in d or d[current_month] < current_count:
d[current_month] = current_count
year = re.findall(r"(\d{4})-\d{1,2}", current_month)[0]
if current_month not in dates or dates[current_month] < current_count:
dates[current_month] = current_count
year = int(re.findall(r"(\d{4})-\d{1,2}", current_month)[0])
if year < min_year:
min_year = year
# Assemble a list with 0's filled in for missing months,
# trimming left and right 0's.
months = []
@ -226,11 +244,11 @@ class Feed(models.Model):
for year in range(min_year, now.year+1):
for month in range(1, 12+1):
if datetime.datetime(year, month, 1) < now:
key = '%s-%s' % (year, month)
if d.get(key) or start:
key = u'%s-%s' % (year, month)
if dates.get(key) or start:
start = True
months.append((key, d.get(key, 0)))
total += d.get(key, 0)
months.append((key, dates.get(key, 0)))
total += dates.get(key, 0)
month_count += 1
self.story_count_history = json.encode(months)
@ -252,7 +270,7 @@ class Feed(models.Model):
def add_feed(self, feed_address, feed_link, feed_title):
print locals()
def update(self, force=False, feed=None, single_threaded=True):
def update(self, force=False, single_threaded=True):
from utils import feed_fetcher
try:
self.feed_address = self.feed_address % {'NEWSBLUR_DIR': settings.NEWSBLUR_DIR}
@ -318,10 +336,10 @@ class Feed(models.Model):
# logging.debug('- Updated story in feed (%s - %s): %s / %s' % (self.feed_title, story.get('title'), len(existing_story.story_content), len(story_content)))
original_content = None
if existing_story.get('story_original_content'):
original_content = existing_story.get('story_original_content')
if existing_story.get('story_original_content_z'):
original_content = zlib.decompress(existing_story.get('story_original_content_z'))
else:
original_content = existing_story.get('story_content')
original_content = zlib.decompress(existing_story.get('story_content_z'))
# print 'Type: %s %s' % (type(original_content), type(story_content))
if len(story_content) > 10:
diff = HTMLDiff(unicode(original_content), story_content)
@ -448,7 +466,7 @@ class Feed(models.Model):
story['story_date'] = story_db.story_date
story['story_authors'] = story_db.story_author_name
story['story_title'] = story_db.story_title
story['story_content'] = story_db.story_content
story['story_content'] = story_db.story_content_z and zlib.decompress(story_db.story_content_z)
story['story_permalink'] = story_db.story_permalink
story['story_feed_id'] = self.pk
story['id'] = story_db.id
@ -646,6 +664,20 @@ class StoryAuthor(models.Model):
class FeedPage(models.Model):
feed = models.OneToOneField(Feed, related_name="feed_page")
page_data = StoryField(null=True, blank=True)
class MFeedPage(mongo.Document):
feed_id = mongo.IntField(primary_key=True)
page_data = mongo.BinaryField()
meta = {
'collection': 'feed_pages',
'allow_inheritance': False,
}
def save(self, *args, **kwargs):
if self.page_data:
self.page_data = zlib.compress(self.page_data)
super(MFeedPage, self).save(*args, **kwargs)
class FeedXML(models.Model):
feed = models.OneToOneField(Feed, related_name="feed_xml")
@ -682,7 +714,7 @@ class Story(models.Model):
def save(self, *args, **kwargs):
if not self.story_guid_hash and self.story_guid:
self.story_guid_hash = hashlib.md5(self.story_guid).hexdigest()
if len(self.story_title) > 255:
if len(self.story_title) > self._meta.get_field('story_title').max_length:
self.story_title = self.story_title[:255]
super(Story, self).save(*args, **kwargs)
@ -692,7 +724,9 @@ class MStory(mongo.Document):
story_date = mongo.DateTimeField()
story_title = mongo.StringField(max_length=1024)
story_content = mongo.StringField()
story_content_z = mongo.BinaryField()
story_original_content = mongo.StringField()
story_original_content_z = mongo.BinaryField()
story_content_type = mongo.StringField(max_length=255)
story_author_name = mongo.StringField()
story_permalink = mongo.StringField()
@ -706,6 +740,15 @@ class MStory(mongo.Document):
'ordering': ['-story_date'],
'allow_inheritance': False,
}
def save(self, *args, **kwargs):
if self.story_content:
self.story_content_z = zlib.compress(self.story_content)
self.story_content = None
if self.story_original_content:
self.story_original_content_z = zlib.compress(self.story_original_content)
self.story_original_content = None
super(MStory, self).save(*args, **kwargs)
class FeedUpdateHistory(models.Model):
fetch_date = models.DateTimeField(default=datetime.datetime.now)
@ -741,6 +784,18 @@ class FeedFetchHistory(models.Model):
self.exception and self.exception[:50]
)
class MFeedFetchHistory(mongo.Document):
feed_id = mongo.IntField()
status_code = mongo.IntField()
message = mongo.StringField()
exception = mongo.StringField()
fetch_date = mongo.DateTimeField()
meta = {
'collection': 'feed_fetch_history',
'allow_inheritance': False,
}
class PageFetchHistory(models.Model):
feed = models.ForeignKey(Feed, related_name='page_fetch_history')
status_code = models.CharField(max_length=10, null=True, blank=True)
@ -758,6 +813,18 @@ class PageFetchHistory(models.Model):
self.exception and self.exception[:50]
)
class MPageFetchHistory(mongo.Document):
feed_id = mongo.IntField()
status_code = mongo.IntField()
message = mongo.StringField()
exception = mongo.StringField()
fetch_date = mongo.DateTimeField()
meta = {
'collection': 'page_fetch_history',
'allow_inheritance': False,
}
class DuplicateFeed(models.Model):
duplicate_address = models.CharField(max_length=255, unique=True)
feed = models.ForeignKey(Feed, related_name='duplicate_addresses')

View file

@ -1,6 +1,8 @@
import datetime
from utils import log as logging
from django.shortcuts import get_object_or_404
from django.http import HttpResponseForbidden
from django.db import IntegrityError
from apps.rss_feeds.models import Feed, merge_feeds
from utils.user_functions import ajax_login_required
from utils import json, feedfinder
@ -36,12 +38,21 @@ def load_feed_statistics(request):
@json.json_view
def exception_retry(request):
feed_id = request.POST['feed_id']
reset_fetch = json.decode(request.POST['reset_fetch'])
feed = get_object_or_404(Feed, pk=feed_id)
feed.has_exception = False
feed.fetched_once = False
feed.next_scheduled_update = datetime.datetime.now()
feed.has_page_exception = False
feed.has_feed_exception = False
if reset_fetch:
logging.info(' ---> [%s] Refreshing exception feed: %s' % (request.user, feed))
feed.fetched_once = False
else:
logging.info(' ---> [%s] Forcing refreshing feed: %s' % (request.user, feed))
feed.save()
feed.update(force=True)
return {'code': 1}
@ -52,19 +63,21 @@ def exception_change_feed_address(request):
feed = get_object_or_404(Feed, pk=feed_id)
feed_address = request.POST['feed_address']
if not feed.has_exception:
if not feed.has_feed_exception and not feed.has_page_exception:
logging.info(" ***********> [%s] Incorrect feed address change: %s" % (request.user, feed))
return HttpResponseForbidden()
feed.has_exception = False
feed.has_feed_exception = False
feed.active = True
feed.fetched_once = False
feed.feed_address = feed_address
feed.next_scheduled_update = datetime.datetime.now()
try:
feed.save()
except:
except IntegrityError:
original_feed = Feed.objects.get(feed_address=feed_address)
original_feed.has_exception = False
original_feed.next_scheduled_update = datetime.datetime.now()
original_feed.has_feed_exception = False
original_feed.active = True
original_feed.save()
merge_feeds(original_feed.pk, feed.pk)
@ -79,24 +92,26 @@ def exception_change_feed_link(request):
feed_link = request.POST['feed_link']
code = -1
if not feed.has_exception:
logging.info(" ***********> [%s] Incorrect feed address change: %s" % (request.user, feed))
if not feed.has_page_exception and not feed.has_feed_exception:
logging.info(" ***********> [%s] Incorrect feed link change: %s" % (request.user, feed))
# This Forbidden-403 throws an error, which sounds pretty good to me right now
return HttpResponseForbidden()
feed_address = feedfinder.feed(feed_link)
if feed_address:
code = 1
feed.has_exception = False
feed.has_page_exception = False
feed.active = True
feed.fetched_once = False
feed.feed_link = feed_link
feed.feed_address = feed_address
feed.next_scheduled_update = datetime.datetime.now()
try:
feed.save()
except:
except IntegrityError:
original_feed = Feed.objects.get(feed_address=feed_address)
original_feed.has_exception = False
original_feed.next_scheduled_update = datetime.datetime.now()
original_feed.has_page_exception = False
original_feed.active = True
original_feed.save()
merge_feeds(original_feed.pk, feed.pk)

33
fabfile.py vendored
View file

@ -1,11 +1,15 @@
from fabric.api import env
from fabric.api import env, run, require, cd
# =========
# = Roles =
# =========
env.user = 'conesus'
env.hosts = ['www.newsblur.com', 'db01.newsblur.com']
env.roledefs ={
'web': ['www.newsblur.com']
'web': ['app01.newsblur.com'],
'ff': ['app01.newsblur.com', 'db01.newsblur.com'],
'db': ['db01.newsblur.com'],
}
# ================
@ -23,26 +27,31 @@ def git_reset():
# ================
def production():
config.fab_hosts = ['www.newsblur.com']
config.repos = (('newsblur', 'origin', 'master'),)
env.fab_hosts = ['app01.newsblur.com', 'db01.newsblur.com']
env.repos = (('newsblur', 'origin', 'master'),)
# ===================
# = Server Commands =
# ===================
def deploy():
with cd('/home/conesus/newsblur'):
run('git pull')
run('./utils/restart')
def restart():
run("cd ~/$(repo)/; ./utils/restart;")
def pull():
require('fab_hosts', provided_by=[production])
for repo, parent, branch in config.repos:
config.repo = repo
config.parent = parent
config.branch = branch
invoke(git_pull)
for repo, parent, branch in env.repos:
env.repo = repo
env.parent = parent
env.branch = branch
git_pull()
def reset(repo, hash):
require('fab_hosts', provided_by=[production])
config.hash = hash
config.repo = repo
invoke(git_reset)
env.hash = hash
env.repo = repo
git_reset()

View file

@ -663,7 +663,7 @@ background: transparent;
#story_titles .NB-feedbar {
font-weight: bold;
font-size: 16px;
padding: 2px 28px 2px 4px;
padding: 2px 208px 2px 4px;
background: #dadada url('../theme/images/dadada_40x100_textures_03_highlight_soft_75.png') 0 50% repeat-x;
border-bottom: 2px solid #404040;
position: relative;
@ -680,17 +680,18 @@ background: transparent;
}
#story_titles .NB-feedbar .feed .feed_title {
float: left;
/* float: left;*/
display: block;
margin-left: 24px;
}
#story_titles .NB-feedbar .NB-feedbar-manage-feed {
background: transparent url("../img/icons/silk/cog.png") no-repeat center center;
float: left;
width: 16px;
height: 16px;
display: none;
cursor: pointer;
padding: 3px 4px 0 16px;
padding: 0 0 0 38px;
}
#story_titles .NB-feedbar .NB-feedbar-mark-feed-read {
background-color: #3090cf;
@ -698,6 +699,7 @@ background: transparent;
cursor: pointer;
position: absolute;
right: 95px;
top: 2px;
font-size: 9px;
line-height: 15px;
font-weight: bold;
@ -707,13 +709,12 @@ background: transparent;
display: none;
}
#story_titles .NB-feedbar .NB-feedbar-statistics {
background: transparent url('../img/icons/silk/chart_curve.png') no-repeat 0 0;
background: transparent url('../img/icons/silk/chart_curve.png') no-repeat left center;
width: 16px;
height: 16px;
float: left;
height: 16px;
display: none;
cursor: pointer;
padding: 0 4px 0 0px;
padding: 0 4px 0 24px;
margin: 3px 0 0 0;
}
#story_titles .NB-feedbar .NB-feedbar-last-updated {
@ -721,6 +722,7 @@ background: transparent;
text-shadow: 0 1px 0 #e6e6e6;
position: absolute;
right: 54px;
top: 2px;
font-size: 9px;
line-height: 15px;
font-weight: bold;
@ -730,7 +732,10 @@ background: transparent;
}
#story_titles .NB-feedbar.NB-feedbar-hover .NB-feedbar-manage-feed,
#story_titles .NB-feedbar.NB-feedbar-hover .NB-feedbar-statistics,
#story_titles .NB-feedbar.NB-feedbar-hover .NB-feedbar-statistics {
display: inline;
}
#story_titles .NB-feedbar.NB-feedbar-hover .NB-feedbar-mark-feed-read {
display: block;
}
@ -1656,7 +1661,7 @@ a.NB-splash-link:hover {
.NB-modal .NB-modal-submit .NB-modal-submit-button {
border: 1px solid #606060;
font-size: 12px;
padding: 4px 8px;
padding: 4px 12px;
text-transform: uppercase;
margin: 2px 4px 2px;
border: 1px solid #606060;
@ -1679,12 +1684,17 @@ a.NB-splash-link:hover {
color: #909090;
font-weight: bold;
}
.NB-modal .NB-modal-submit .NB-modal-submit-save {
.NB-modal .NB-modal-submit .NB-modal-submit-green {
background-color: #217412;
font-weight: bold;
color: #FCFCFC;
}
.NB-modal .NB-modal-submit .NB-modal-submit-delete {
background-color: #7E020D;
font-weight: bold;
color: #FCFCFC;
}
.NB-modal .NB-modal-submit .NB-disabled {
background-color: #d5d4dB;
color: #909090;
@ -1830,7 +1840,7 @@ a.NB-splash-link:hover {
color: #FFF;
background-color: #4679BB;
}
.NB-modal-trainer .NB-modal-submit .NB-modal-submit-save {
.NB-modal-trainer .NB-modal-submit .NB-modal-submit-green {
float: right;
padding-left: 12px !important;
padding-right: 12px !important;
@ -1953,7 +1963,7 @@ a.NB-splash-link:hover {
}
.NB-add input[type=text] {
width: 350px;
width: 340px;
font-size: 14px;
padding: 2px;
margin: 2px 4px 2px;
@ -2069,7 +2079,7 @@ a.NB-splash-link:hover {
}
.NB-manage .NB-manage-delete {
margin: 0 0 4px 12px;
margin: 6px 0 4px;
font-size: 12px;
}
@ -2729,7 +2739,7 @@ background: transparent;
color: #4A9937;
}
.NB-modal-exception .NB-modal-submit input.NB-modal-submit-save {
.NB-modal-exception .NB-modal-submit input.NB-modal-submit-green {
margin-bottom: 6px;
}

View file

@ -452,7 +452,10 @@ NEWSBLUR.AssetModel.Reader.prototype = {
save_exception_retry: function(feed_id, callback) {
var self = this;
if (NEWSBLUR.Globals.is_authenticated) {
this.make_request('/rss_feeds/exception_retry', {'feed_id': feed_id}, callback);
this.make_request('/rss_feeds/exception_retry', {
'feed_id': feed_id,
'reset_fetch': !!(this.feeds[feed_id].has_feed_exception || this.feeds[feed_id].has_page_exception)
}, callback);
} else {
if ($.isFunction(callback)) callback();
}

View file

@ -474,7 +474,7 @@
scroll = 0;
}
$feed_list.scrollTop(scroll);
this.open_feed(feed_id, $next_feed);
// this.open_feed(feed_id, $next_feed);
}
},
@ -571,6 +571,7 @@
}
if (feed.not_yet_fetched) {
NEWSBLUR.log(['Feed not fetched', feed]);
if (!this.model.preference('hide_fetch_progress')) {
this.flags['has_unfetched_feeds'] = true;
}
@ -640,9 +641,11 @@
])
]),
$.make('img', { className: 'feed_favicon', src: this.google_favicon_url + feed.feed_link }),
$.make('span', { className: 'feed_title' }, feed.feed_title),
$.make('div', { className: 'NB-feedbar-manage-feed', title: 'Manage Intelligence' }),
(type == 'story' && $.make('div', { className: 'NB-feedbar-statistics', title: 'Statistics' })),
$.make('span', { className: 'feed_title' }, [
feed.feed_title,
$.make('span', { className: 'NB-feedbar-manage-feed', title: 'Manage Intelligence' }),
(type == 'story' && $.make('span', { className: 'NB-feedbar-statistics', title: 'Statistics' }))
]),
(type == 'story' && $.make('div', { className: 'NB-feedbar-last-updated' }, [
$.make('span', { className: 'NB-feedbar-last-updated-label' }, 'Updated: '),
$.make('span', { className: 'NB-feedbar-last-updated-date' }, feed.updated)
@ -838,7 +841,8 @@
'page_view_showing_feed_view': false,
'iframe_fetching_story_locations': false,
'story_titles_loaded': false,
'iframe_prevented_from_loading': false
'iframe_prevented_from_loading': false,
'pause_feed_refreshing': false
});
$.extend(this.cache, {
@ -857,12 +861,16 @@
this.$s.$story_titles.data('feed_id', null);
},
open_feed: function(feed_id, $feed_link) {
open_feed: function(feed_id, $feed_link, force) {
var self = this;
var $story_titles = this.$s.$story_titles;
this.flags['opening_feed'] = true;
if (feed_id != this.active_feed) {
if (!$feed_link) {
$feed_link = $('.feed.selected', this.$feed_list).eq(0);
}
if (feed_id != this.active_feed || force) {
$story_titles.empty().scrollTop('0px');
this.reset_feed();
this.hide_splash_page();
@ -1793,7 +1801,7 @@
},
determine_feed_view_story_position: function($story, story) {
if ($story.is(':visible')) {
if ($story && $story.is(':visible')) {
var position_original = parseInt($story.offset().top, 10);
var position_offset = parseInt($story.offsetParent().scrollTop(), 10);
var position = position_original + position_offset;
@ -1948,6 +1956,7 @@
open_add_feed_modal: function() {
clearInterval(this.flags['bouncing_callout']);
$.modal.close();
NEWSBLUR.add_feed = new NEWSBLUR.ReaderAddFeed();
},
@ -2321,26 +2330,34 @@
setup_feed_refresh: function() {
var self = this;
var FEED_REFRESH_INTERVAL = (1000 * 60) / 2; // 1/2 minutes
var FEED_REFRESH_INTERVAL = (1000 * 60) / 12; // 1/2 minutes
clearInterval(this.flags.feed_refresh);
this.flags.feed_refresh = setInterval(function() {
self.model.refresh_feeds($.rescope(self.post_feed_refresh, self), self.flags['has_unfetched_feeds']);
if (!self.flags['pause_feed_refreshing']) {
self.model.refresh_feeds(_.bind(function(updated_feeds) {
self.post_feed_refresh(updated_feeds);
}, self), self.flags['has_unfetched_feeds']);
}
}, FEED_REFRESH_INTERVAL);
},
force_feed_refresh: function(callback) {
force_feed_refresh: function(callback, update_all) {
if (callback) {
this.cache.refresh_callback = callback;
} else {
delete this.cache.refresh_callback;
}
this.model.refresh_feeds($.rescope(this.post_feed_refresh, this), this.flags['has_unfetched_feeds']);
this.flags['pause_feed_refreshing'] = true;
this.model.refresh_feeds(_.bind(function(updated_feeds) {
this.post_feed_refresh(updated_feeds, update_all);
}, this), this.flags['has_unfetched_feeds']);
},
post_feed_refresh: function(e, updated_feeds) {
post_feed_refresh: function(updated_feeds, update_all) {
var feeds = this.model.feeds;
if (this.cache.refresh_callback && $.isFunction(this.cache.refresh_callback)) {
@ -2354,16 +2371,14 @@
if (!feed) continue;
var $feed = this.make_feed_title_line(feed, true, 'feed');
var $feed_on_page = this.find_feed_in_feed_list(feed_id);
var selected = $feed_on_page.hasClass('selected');
$('.unread_count', $feed).corner('4px');
if (selected) {
$feed.addClass('selected');
}
if (feed_id == this.active_feed) {
NEWSBLUR.log(['UPDATING INLINE', feed.feed_title, $feed, $feed_on_page]);
var limit = $('.story', this.$s.$story_titles).length;
this.model.refresh_feed(feed_id, $.rescope(this.post_refresh_active_feed, this), limit);
// this.model.refresh_feed(feed_id, $.rescope(this.post_refresh_active_feed, this), limit);
$feed_on_page.replaceWith($feed);
this.mark_feed_as_selected(this.active_feed, $feed);
} else {
if (!this.flags['has_unfetched_feeds']) {
NEWSBLUR.log(['UPDATING', feed.feed_title, $feed, $feed_on_page]);
@ -2373,6 +2388,8 @@
}
this.check_feed_fetch_progress();
this.flags['pause_feed_refreshing'] = false;
},
post_refresh_active_feed: function(e, data, first_load) {

View file

@ -34,7 +34,7 @@ NEWSBLUR.ReaderAddFeed.prototype = {
$.make('div', { className: 'NB-loading' }),
$.make('label', { 'for': 'NB-add-url' }, 'RSS or URL: '),
$.make('input', { type: 'text', id: 'NB-add-url', className: 'NB-add-url', name: 'url' }),
$.make('input', { type: 'submit', value: 'Add it', className: 'NB-modal-submit-save NB-add-url-submit' }),
$.make('input', { type: 'submit', value: 'Add it', className: 'NB-modal-submit-green NB-add-url-submit' }),
$.make('div', { className: 'NB-error' })
])
])
@ -51,7 +51,7 @@ NEWSBLUR.ReaderAddFeed.prototype = {
$.make('div', { className: 'NB-folder-icon' })
]),
$.make('input', { type: 'text', id: 'NB-add-folder', className: 'NB-add-folder', name: 'url' }),
$.make('input', { type: 'submit', value: 'Add folder', className: 'NB-add-folder-submit NB-modal-submit-save' }),
$.make('input', { type: 'submit', value: 'Add folder', className: 'NB-add-folder-submit NB-modal-submit-green' }),
$.make('div', { className: 'NB-error' })
])
])
@ -61,7 +61,7 @@ NEWSBLUR.ReaderAddFeed.prototype = {
'Import feeds'
]),
$.make('div', { className: 'NB-fieldset-fields' }, [
$.make('a', { href: NEWSBLUR.URLs['google-reader-authorize'], className: 'NB-google-reader-oauth NB-modal-submit-save NB-modal-submit-button' }, [
$.make('a', { href: NEWSBLUR.URLs['google-reader-authorize'], className: 'NB-google-reader-oauth NB-modal-submit-green NB-modal-submit-button' }, [
'Import from Google Reader',
$.make('img', { className: 'NB-add-google-reader-arrow', src: NEWSBLUR.Globals['MEDIA_URL']+'img/icons/silk/arrow_right.png' })
]),
@ -77,7 +77,7 @@ NEWSBLUR.ReaderAddFeed.prototype = {
$.make('form', { method: 'post', enctype: 'multipart/form-data', className: 'NB-add-form' }, [
$.make('div', { className: 'NB-loading' }),
$.make('input', { type: 'file', name: 'file', id: 'opml_file_input' }),
$.make('input', { type: 'submit', className: 'NB-add-opml-button NB-modal-submit-save', value: 'Upload OPML File' }).click(function(e) {
$.make('input', { type: 'submit', className: 'NB-add-opml-button NB-modal-submit-green', value: 'Upload OPML File' }).click(function(e) {
e.preventDefault();
self.handle_opml_upload();
return false;
@ -286,7 +286,9 @@ NEWSBLUR.ReaderAddFeed.prototype = {
if (data.code > 0) {
NEWSBLUR.reader.load_feeds();
$.modal.close();
_.defer(function() {
NEWSBLUR.reader.open_add_feed_modal();
});
} else {
var $error = $('.NB-error', '.NB-fieldset.NB-add-add-folder');
$error.text(data.message);

View file

@ -145,7 +145,7 @@ var classifier = {
this.trainer_data = data;
$begin.text('Begin Training')
.addClass('NB-modal-submit-save')
.addClass('NB-modal-submit-green')
.removeClass('NB-disabled');
},
@ -202,7 +202,7 @@ var classifier = {
(!NEWSBLUR.Globals.is_authenticated && $.make('div', { className: 'NB-trainer-not-authenticated' }, 'Please create an account and add sites you read. Then you can train them.')),
$.make('div', { className: 'NB-modal-submit' }, [
(!NEWSBLUR.Globals.is_authenticated && $.make('a', { href: '#', className: 'NB-modal-submit-close NB-modal-submit-button' }, 'Close')),
(NEWSBLUR.Globals.is_authenticated && $.make('a', { href: '#', className: 'NB-modal-submit-save NB-modal-submit-begin NB-modal-submit-button NB-disabled' }, 'Loading Training...'))
(NEWSBLUR.Globals.is_authenticated && $.make('a', { href: '#', className: 'NB-modal-submit-begin NB-modal-submit-button NB-disabled' }, 'Loading Training...'))
])
]);
@ -287,14 +287,14 @@ var classifier = {
$.make('input', { name: 'score', value: this.score, type: 'hidden' }),
$.make('input', { name: 'feed_id', value: this.feed_id, type: 'hidden' }),
$.make('a', { href: '#', className: 'NB-modal-submit-button NB-modal-submit-back' }, $.entity('&laquo;') + ' Back'),
$.make('a', { href: '#', className: 'NB-modal-submit-button NB-modal-submit-save' }, 'Save & Next '+$.entity('&raquo;')),
$.make('a', { href: '#', className: 'NB-modal-submit-button NB-modal-submit-green NB-modal-submit-save' }, 'Save & Next '+$.entity('&raquo;')),
$.make('a', { href: '#', className: 'NB-modal-submit-button NB-modal-submit-close' }, 'Close')
])),
(!this.options['training'] && $.make('div', { className: 'NB-modal-submit' }, [
$.make('input', { name: 'score', value: this.score, type: 'hidden' }),
$.make('input', { name: 'story_id', value: this.story_id, type: 'hidden' }),
$.make('input', { name: 'feed_id', value: this.feed_id, type: 'hidden' }),
$.make('input', { type: 'submit', disabled: 'true', className: 'NB-modal-submit-save NB-disabled', value: 'Check what you like above...' }),
$.make('input', { type: 'submit', disabled: 'true', className: 'NB-modal-submit-save NB-modal-submit-green NB-disabled', value: 'Check what you like above...' }),
' or ',
$.make('a', { href: '#', className: 'NB-modal-cancel' }, 'cancel')
]))
@ -356,7 +356,7 @@ var classifier = {
$.make('input', { name: 'score', value: this.score, type: 'hidden' }),
$.make('input', { name: 'story_id', value: this.story_id, type: 'hidden' }),
$.make('input', { name: 'feed_id', value: this.feed_id, type: 'hidden' }),
$.make('input', { type: 'submit', disabled: 'true', className: 'NB-modal-submit-save NB-disabled', value: 'Check what you like above...' }),
$.make('input', { type: 'submit', disabled: 'true', className: 'NB-modal-submit-save NB-modal-submit-green NB-disabled', value: 'Check what you like above...' }),
' or ',
$.make('a', { href: '#', className: 'NB-modal-cancel' }, 'cancel')
])

View file

@ -13,17 +13,20 @@ NEWSBLUR.ReaderFeedException.prototype = {
runner: function() {
this.make_modal();
this.change_retry_option_meta();
this.show_recommended_options_meta();
this.handle_cancel();
this.open_modal();
this.$modal.bind('click', $.rescope(this.handle_click, this));
NEWSBLUR.log(['Exception Modal', this.feed]);
},
make_modal: function() {
var self = this;
this.$modal = $.make('div', { className: 'NB-modal-exception NB-modal' }, [
$.make('div', { className: 'NB-modal-loading' }),
$.make('h2', { className: 'NB-modal-title' }, 'Fix a misbehaving site'),
$.make('h2', { className: 'NB-modal-subtitle' }, [
$.make('img', { className: 'NB-modal-feed-image feed_favicon', src: this.google_favicon_url + this.feed.feed_link }),
@ -38,13 +41,14 @@ NEWSBLUR.ReaderFeedException.prototype = {
$.make('div', { className: 'NB-fieldset-fields' }, [
$.make('div', [
$.make('div', { className: 'NB-loading' }),
$.make('input', { type: 'submit', value: 'Retry fetching and parsing', className: 'NB-modal-submit-save NB-modal-submit-retry' }),
$.make('input', { type: 'submit', value: 'Retry fetching and parsing', className: 'NB-modal-submit-green NB-modal-submit-retry' }),
$.make('div', { className: 'NB-error' })
])
])
]),
$.make('div', { className: 'NB-fieldset NB-exception-option NB-exception-option-link NB-modal-submit' }, [
$.make('div', { className: 'NB-fieldset NB-exception-option NB-exception-option-page NB-modal-submit' }, [
$.make('h5', [
$.make('div', { className: 'NB-exception-option-meta' }),
$.make('span', { className: 'NB-exception-option-option' }, 'Option 2:'),
'Change Website Address'
]),
@ -56,13 +60,14 @@ NEWSBLUR.ReaderFeedException.prototype = {
'Website URL: '
]),
$.make('input', { type: 'text', id: 'NB-exception-input-link', className: 'NB-exception-input-link', name: 'feed_link', value: this.feed['feed_link'] }),
$.make('input', { type: 'submit', value: 'Fetch Feed From Website', className: 'NB-modal-submit-save NB-modal-submit-link' }),
$.make('input', { type: 'submit', value: 'Fetch Feed From Website', className: 'NB-modal-submit-green NB-modal-submit-link' }),
$.make('div', { className: 'NB-error' })
])
])
]),
$.make('div', { className: 'NB-fieldset NB-exception-option NB-exception-option-address NB-modal-submit' }, [
$.make('div', { className: 'NB-fieldset NB-exception-option NB-exception-option-feed NB-modal-submit' }, [
$.make('h5', [
$.make('div', { className: 'NB-exception-option-meta' }),
$.make('span', { className: 'NB-exception-option-option' }, 'Option 3:'),
'Change RSS Feed Address'
]),
@ -74,7 +79,7 @@ NEWSBLUR.ReaderFeedException.prototype = {
'RSS/XML URL: '
]),
$.make('input', { type: 'text', id: 'NB-exception-input-address', className: 'NB-exception-input-address', name: 'feed_address', value: this.feed['feed_address'] }),
$.make('input', { type: 'submit', value: 'Parse this RSS/XML Feed', className: 'NB-modal-submit-save NB-modal-submit-address' }),
$.make('input', { type: 'submit', value: 'Parse this RSS/XML Feed', className: 'NB-modal-submit-green NB-modal-submit-address' }),
$.make('div', { className: 'NB-error' })
])
])
@ -87,7 +92,7 @@ NEWSBLUR.ReaderFeedException.prototype = {
$.make('div', { className: 'NB-fieldset-fields' }, [
$.make('div', [
$.make('div', { className: 'NB-loading' }),
$.make('input', { type: 'submit', value: 'Delete It. It Just Won\'t Work!', className: 'NB-modal-submit-save NB-modal-submit-delete' }),
$.make('input', { type: 'submit', value: 'Delete It. It Just Won\'t Work!', className: 'NB-modal-submit-red NB-modal-submit-delete' }),
$.make('div', { className: 'NB-error' })
])
])
@ -95,11 +100,30 @@ NEWSBLUR.ReaderFeedException.prototype = {
]);
},
change_retry_option_meta: function() {
var $meta = $('.NB-exception-option-retry .NB-exception-option-meta', this.$modal);
show_recommended_options_meta: function() {
var $meta_retry = $('.NB-exception-option-retry .NB-exception-option-meta', this.$modal);
var $meta_page = $('.NB-exception-option-page .NB-exception-option-meta', this.$modal);
var $meta_feed = $('.NB-exception-option-feed .NB-exception-option-meta', this.$modal);
var is_400 = (400 <= this.feed.exception_code && this.feed.exception_code < 500);
$meta.addClass('NB-exception-option-meta-recommended');
$meta.text('Recommended');
if (!is_400) {
$meta_retry.addClass('NB-exception-option-meta-recommended');
$meta_retry.text('Recommended');
return;
}
if (this.feed.exception_type == 'feed') {
$meta_page.addClass('NB-exception-option-meta-recommended');
$meta_page.text('Recommended');
}
if (this.feed.exception_type == 'page') {
if (is_400) {
$meta_feed.addClass('NB-exception-option-meta-recommended');
$meta_feed.text('Recommended');
} else {
$meta_page.addClass('NB-exception-option-meta-recommended');
$meta_page.text('Recommended');
}
}
},
open_modal: function() {
@ -141,6 +165,11 @@ NEWSBLUR.ReaderFeedException.prototype = {
save_retry_feed: function() {
var self = this;
var $loading = $('.NB-modal-loading', this.$modal);
$loading.addClass('NB-active');
$('.NB-modal-submit-retry', this.$modal).addClass('NB-disabled').attr('value', 'Fetching...');
this.model.save_exception_retry(this.feed_id, function() {
NEWSBLUR.reader.flags['has_unfetched_feeds'] = true;
NEWSBLUR.reader.force_feed_refresh();
@ -149,8 +178,11 @@ NEWSBLUR.ReaderFeedException.prototype = {
},
delete_feed: function() {
var $loading = $('.NB-modal-loading', this.$model);
var $loading = $('.NB-modal-loading', this.$modal);
$loading.addClass('NB-active');
$('.NB-modal-submit-delete', this.$modal).addClass('NB-disabled').attr('value', 'Deleting...');
var feed_id = this.feed_id;
this.model.delete_publisher(feed_id, function() {
@ -160,8 +192,11 @@ NEWSBLUR.ReaderFeedException.prototype = {
},
change_feed_address: function() {
var $loading = $('.NB-modal-loading', this.$model);
var $loading = $('.NB-modal-loading', this.$modal);
$loading.addClass('NB-active');
$('.NB-modal-submit-address', this.$modal).addClass('NB-disabled').attr('value', 'Parsing...');
var feed_id = this.feed_id;
var feed_address = $('input[name=feed_address]', this.$modal).val();
@ -175,8 +210,11 @@ NEWSBLUR.ReaderFeedException.prototype = {
},
change_feed_link: function() {
var $loading = $('.NB-modal-loading', this.$model);
var $loading = $('.NB-modal-loading', this.$modal);
$loading.addClass('NB-active');
$('.NB-modal-submit-link', this.$modal).addClass('NB-disabled').attr('value', 'Fetching...');
var feed_id = this.feed_id;
var feed_link = $('input[name=feed_link]', this.$modal).val();

View file

@ -73,13 +73,14 @@ NEWSBLUR.ReaderManageFeed.prototype = {
]),
$.make('div', { className: 'NB-fieldset' }, [
$.make('h5', 'Management'),
$.make('div', { className: 'NB-manage-management NB-fieldset-fields' }, [
$.make('div', { className: 'NB-manage-management NB-fieldset-fields NB-modal-submit' }, [
$.make('div', { className: 'NB-manage-rename' }, [
$.make('label', { className: 'NB-manage-rename-label', 'for': 'id_rename' }, "Feed Title: "),
$.make('input', { name: 'rename_title', id: 'id_rename' })
]),
$.make('input', { type: 'submit', value: 'Fetch and refresh this site', className: 'NB-modal-submit-green NB-modal-submit-retry' }),
$.make('div', { className: 'NB-manage-delete' }, [
$.make('a', { className: 'NB-delete', href: '#' }, "Delete this feed"),
$.make('input', { type: 'submit', value: 'Delete this site', className: 'NB-modal-submit-green NB-modal-submit-delete' }),
$.make('a', { className: 'NB-delete-confirm', href: '#' }, "Yes, delete this feed!"),
$.make('a', { className: 'NB-delete-cancel', href: '#' }, "cancel")
])
@ -92,7 +93,7 @@ NEWSBLUR.ReaderManageFeed.prototype = {
]),
$.make('div', { className: 'NB-modal-submit' }, [
$.make('input', { name: 'feed_id', type: 'hidden' }),
$.make('input', { type: 'submit', disabled: 'true', className: 'NB-disabled', value: 'Check what you like above...' }),
$.make('input', { type: 'submit', disabled: 'true', className: 'NB-modal-submit-save NB-modal-submit-green NB-disabled', value: 'Check what you like above...' }),
' or ',
$.make('a', { href: '#', className: 'NB-modal-cancel' }, 'cancel')
])
@ -310,7 +311,7 @@ NEWSBLUR.ReaderManageFeed.prototype = {
},
save: function() {
var $save = $('.NB-modal input[type=submit]');
var $save = $('.NB-modal-submit-save', this.$manage);
var data = this.serialize_classifier();
NEWSBLUR.reader.update_opinions(this.$manage, this.feed_id);
@ -322,6 +323,22 @@ NEWSBLUR.ReaderManageFeed.prototype = {
});
},
save_retry_feed: function() {
var self = this;
var $loading = $('.NB-modal-loading', this.$manage);
$loading.addClass('NB-active');
$('.NB-modal-submit-retry', this.$manage).addClass('NB-disabled').attr('value', 'Fetching...');
this.model.save_exception_retry(this.feed_id, function() {
NEWSBLUR.reader.force_feed_refresh(function() {
if (NEWSBLUR.reader.active_feed == self.feed_id) {
NEWSBLUR.reader.open_feed(self.feed_id, null, true);
}
$.modal.close();
}, true);
});
},
delete_feed: function() {
var $loading = $('.NB-modal-loading', this.$manage);
$loading.addClass('NB-active');
@ -336,12 +353,12 @@ NEWSBLUR.ReaderManageFeed.prototype = {
handle_click: function(elem, e) {
var self = this;
$.targetIs(e, { tagSelector: '.NB-delete' }, function($t, $p){
$.targetIs(e, { tagSelector: '.NB-modal-submit-delete' }, function($t, $p){
e.preventDefault();
var $confirm = $('.NB-delete-confirm', self.$manage);
var $cancel = $('.NB-delete-cancel', self.$manage);
var $delete = $('.NB-delete', self.$manage);
var $delete = $('.NB-modal-submit-delete', self.$manage);
$delete.animate({'opacity': 0}, {'duration': 500});
$confirm.fadeIn(500);
@ -353,7 +370,7 @@ NEWSBLUR.ReaderManageFeed.prototype = {
var $confirm = $('.NB-delete-confirm', self.$manage);
var $cancel = $('.NB-delete-cancel', self.$manage);
var $delete = $('.NB-delete', self.$manage);
var $delete = $('.NB-modal-submit-delete', self.$manage);
$delete.css({'opacity': 1});
$confirm.css({'display': 'none'});
@ -367,9 +384,15 @@ NEWSBLUR.ReaderManageFeed.prototype = {
});
$.targetIs(e, { tagSelector: 'input', childOf: '.NB-classifier' }, function($t, $p) {
var $submit = $('input[type=submit]', self.$manage);
var $submit = $('.NB-modal-submit-save', self.$manage);
$submit.removeClass("NB-disabled").removeAttr('disabled').attr('value', 'Save');
});
$.targetIs(e, { tagSelector: '.NB-modal-submit-retry' }, function($t, $p) {
e.preventDefault();
self.save_retry_feed();
});
},
handle_change: function(elem, e) {
@ -381,8 +404,8 @@ NEWSBLUR.ReaderManageFeed.prototype = {
self.load_feed_classifier();
});
$.targetIs(e, { tagSelector: 'input', childOf: '.NB-classifier' }, function($t, $p) {
var $submit = $('input[type=submit]', self.$manage);
$.targetIs(e, { tagSelector: 'input[type=checkbox]', childOf: '.NB-classifier' }, function($t, $p) {
var $submit = $('.NB-modal-submit-save', self.$manage);
$submit.removeClass("NB-disabled").removeAttr('disabled').attr('value', 'Save');
});
},
@ -392,7 +415,7 @@ NEWSBLUR.ReaderManageFeed.prototype = {
$.targetIs(e, { tagSelector: 'input', childOf: '.NB-manage-rename' }, function($t, $p) {
if ($t.val() != self.feed.feed_title) {
var $submit = $('input[type=submit]', self.$manage);
var $submit = $('.NB-modal-submit-save', self.$manage);
$submit.removeClass("NB-disabled").removeAttr('disabled').attr('value', 'Save');
}
});

View file

@ -30,7 +30,7 @@ NEWSBLUR.ReaderMarkRead.prototype = {
$.make('div', { className: 'NB-markread-slider'}),
$.make('div', { className: 'NB-markread-explanation'}),
$.make('div', { className: 'NB-modal-submit' }, [
$.make('input', { type: 'submit', className: 'NB-modal-submit-save', value: 'Do it' }),
$.make('input', { type: 'submit', className: 'NB-modal-submit-save NB-modal-submit-green', value: 'Do it' }),
' or ',
$.make('a', { href: '#', className: 'NB-modal-cancel' }, 'cancel')
])

View file

@ -186,7 +186,8 @@
</h5>
</div>
<div class="NB-module NB-module-features">
<!--
<div class="NB-module NB-module-features">
<h5 class="NB-module-header">Important information</h5>
<table class="" cellpadding="0" cellspacing="0">
@ -196,6 +197,7 @@
</tr>
</table>
</div>
-->
{% endif %}
<div class="NB-module NB-module-features">
@ -277,7 +279,7 @@
<div id="NB-feeds-list-loader">Everything is on its way...</div>
<ul class="left-center" id="feed_list"></ul>
<ul class="left-center folder" id="feed_list"></ul>
<div id="NB-progress">
<div class="NB-progress-container">

View file

@ -1,7 +1,7 @@
from pprint import pprint
from django.conf import settings
from apps.reader.models import MUserStory, UserStory
from apps.rss_feeds.models import Feed, Story, MStory, StoryAuthor, Tag
from apps.rss_feeds.models import Feed, Story, MStory, StoryAuthor, Tag, MFeedPage, FeedPage
from apps.analyzer.models import MClassifierTitle, MClassifierAuthor, MClassifierFeed, MClassifierTag
from apps.analyzer.models import ClassifierTitle, ClassifierAuthor, ClassifierFeed, ClassifierTag
import mongoengine
@ -107,7 +107,61 @@ def bootstrap_classifiers():
print "\nMongo DB classifiers: %s - %s" % (collection, mongo_classifier.objects().count())
def bootstrap_feedpages():
print "Mongo DB feed_pages: %s" % MFeedPage.objects().count()
# db.feed_pages.drop()
print "Dropped! Mongo DB feed_pages: %s" % MFeedPage.objects().count()
print "FeedPages: %s" % MFeedPage.objects().count()
pprint(db.feed_pages.index_information())
feeds = Feed.objects.all().order_by('-average_stories_per_month')
feed_count = feeds.count()
i = 0
for feed in feeds:
i += 1
print "%s/%s: %s" % (i, feed_count, feed,)
sys.stdout.flush()
if not MFeedPage.objects(feed_id=feed.pk):
feed_page = FeedPage.objects.filter(feed=feed).values()
if feed_page:
del feed_page[0]['id']
feed_page[0]['feed_id'] = feed.pk
try:
MFeedPage(**feed_page[0]).save()
except:
print '\n\n!\n\n'
continue
print "\nMongo DB feed_pages: %s" % MFeedPage.objects().count()
def compress_stories():
count = MStory.objects().count()
print "Mongo DB stories: %s" % count
p = 0.0
i = 0
feeds = Feed.objects.all().order_by('-average_stories_per_month')
feed_count = feeds.count()
f = 0
for feed in feeds:
f += 1
print "%s/%s: %s" % (f, feed_count, feed,)
sys.stdout.flush()
for story in MStory.objects(story_feed_id=feed.pk):
i += 1.0
if round(i / count * 100) != p:
p = round(i / count * 100)
print '%s%%' % p
story.save()
if __name__ == '__main__':
# bootstrap_stories()
bootstrap_userstories()
bootstrap_classifiers()
# bootstrap_userstories()
# bootstrap_classifiers()
bootstrap_feedpages()
compress_stories()

View file

@ -21,7 +21,7 @@ import pymongo
# Refresh feed code adapted from Feedjack.
# http://feedjack.googlecode.com
VERSION = '0.8'
VERSION = '0.9'
URL = 'http://www.newsblur.com/'
USER_AGENT = 'NewsBlur Fetcher %s - %s' % (VERSION, URL)
SLOWFEED_WARNING = 10
@ -51,17 +51,23 @@ class FetchFeed:
# Check if feed still needs to be updated
feed = Feed.objects.get(pk=self.feed.pk)
if feed.last_update > datetime.datetime.now() and not self.options.get('force'):
if feed.next_scheduled_update > datetime.datetime.now() and not self.options.get('force'):
log_msg = u' ---> Already fetched %s (%d)' % (self.feed.feed_title,
self.feed.id)
logging.debug(log_msg)
feed.save_feed_history(303, "Already fetched")
return FEED_SAME, None
etag=self.feed.etag
modified = self.feed.last_modified.utctimetuple()[:7] if self.feed.last_modified else None
if self.options.get('force'):
modified = None
etag = None
self.fpf = feedparser.parse(self.feed.feed_address,
agent=USER_AGENT,
etag=self.feed.etag,
etag=etag,
modified=modified)
return FEED_OK, self.fpf
@ -93,8 +99,7 @@ class ProcessFeed:
ENTRY_SAME:0,
ENTRY_ERR:0}
logging.debug(u' ---> [%d] Processing %s' % (self.feed.id,
self.feed.feed_title))
# logging.debug(u' ---> [%d] Processing %s' % (self.feed.id, self.feed.feed_title))
if hasattr(self.fpf, 'status'):
if self.options['verbose']:
@ -269,19 +274,17 @@ class Dispatcher:
try:
ffeed = FetchFeed(feed, self.options)
ret_feed, fetched_feed = ffeed.fetch()
delta = datetime.datetime.now() - start_time
if fetched_feed and ret_feed == FEED_OK:
if ((fetched_feed and ret_feed == FEED_OK) or self.options['force']):
pfeed = ProcessFeed(feed, fetched_feed, db, self.options)
ret_feed, ret_entries = pfeed.process()
if ret_entries.get(ENTRY_NEW):
if ret_entries.get(ENTRY_NEW) or self.options['force']:
user_subs = UserSubscription.objects.filter(feed=feed)
for sub in user_subs:
cache.delete('usersub:%s' % sub.user_id)
sub.calculate_feed_scores(silent=True)
if ret_entries.get(ENTRY_NEW) or ret_entries.get(ENTRY_UPDATED):
if ret_entries.get(ENTRY_NEW) or ret_entries.get(ENTRY_UPDATED) or self.options['force']:
feed.get_stories(force=True)
except KeyboardInterrupt:
break
@ -297,15 +300,15 @@ class Dispatcher:
feed.save_feed_history(500, "Error", tb)
fetched_feed = None
if (fetched_feed and
feed.feed_link and
(ret_feed == FEED_OK or
(ret_feed == FEED_SAME and feed.stories_last_month > 10))):
if ((self.options['force']) or
(fetched_feed and
feed.feed_link and
(ret_feed == FEED_OK or
(ret_feed == FEED_SAME and feed.stories_last_month > 10)))):
page_importer = PageImporter(feed.feed_link, feed)
page_importer.fetch_page()
if not delta:
delta = datetime.datetime.now() - start_time
delta = datetime.datetime.now() - start_time
if delta.seconds > SLOWFEED_WARNING:
comment = u' (SLOW FEED!)'
else:

View file

@ -5,9 +5,8 @@ from django.utils.encoding import force_unicode
from django.utils import simplejson as json
from decimal import Decimal
from django.core import serializers
from django.http import HttpResponse
from django.http import HttpResponse, HttpResponseForbidden
from django.core.mail import mail_admins
from django.utils.translation import ugettext as _
from django.db.models.query import QuerySet
import sys
@ -115,7 +114,9 @@ def json_view(func):
response = {'result': 'error',
'text': unicode(e)}
if isinstance(response, HttpResponseForbidden):
return response
json = json_encode(response)
return HttpResponse(json, mimetype='application/json')
return wrap
@ -123,7 +124,7 @@ def json_view(func):
def main():
test = {1: True, 2: u"string", 3: 30}
json_test = json_encode(test)
# print test, json_test
print test, json_test
if __name__ == '__main__':
main()

View file

@ -11,7 +11,7 @@ def getlogger():
hdlr = logging.StreamHandler()
else:
hdlr = logging.FileHandler(settings.LOG_FILE)
formatter = logging.Formatter('[%(asctime)-12s] %(message)s','%b %d %H:%M')
formatter = logging.Formatter('[%(asctime)-12s] %(message)s','%b %d %H:%M:%S')
hdlr.setFormatter(formatter)
logger.addHandler(hdlr)

View file

@ -1,7 +1,7 @@
#!/usr/bin/env python
from utils.munin.base import MuninGraph
from apps.rss_feeds.models import FeedFetchHistory, PageFetchHistory
from apps.rss_feeds.models import MFeedFetchHistory, MPageFetchHistory
import datetime
@ -18,10 +18,10 @@ graph_config = {
last_day = datetime.datetime.now() - datetime.timedelta(days=1)
metrics = {
'feed_errors': FeedFetchHistory.objects.filter(fetch_date__gte=last_day).exclude(status_code__in=[200, 304]).count(),
'feed_success': FeedFetchHistory.objects.filter(fetch_date__gte=last_day).filter(status_code__in=[200, 304]).count(),
'page_errors': PageFetchHistory.objects.filter(fetch_date__gte=last_day).exclude(status_code__in=[200, 304]).count(),
'page_success': PageFetchHistory.objects.filter(fetch_date__gte=last_day).filter(status_code__in=[200, 304]).count(),
'feed_errors': MFeedFetchHistory.objects(fetch_date__gte=last_day, status_code__nin=[200, 304]).count(),
'feed_success': MFeedFetchHistory.objects(fetch_date__gte=last_day, status_code__in=[200, 304]).count(),
'page_errors': MPageFetchHistory.objects(fetch_date__gte=last_day, status_code__nin=[200, 304]).count(),
'page_success': MPageFetchHistory.objects(fetch_date__gte=last_day, status_code__in=[200, 304]).count(),
}
if __name__ == '__main__':

View file

@ -3,6 +3,7 @@ import datetime
from utils.munin.base import MuninGraph
from apps.rss_feeds.models import Feed
from apps.reader.models import UserSubscription
from django.db.models import Q
graph_config = {
'graph_category' : 'NewsBlur',
@ -17,7 +18,7 @@ graph_config = {
metrics = {
'feeds': Feed.objects.count(),
'subscriptions': UserSubscription.objects.count(),
'exception_feeds': Feed.objects.filter(has_exception=True).count(),
'exception_feeds': Feed.objects.filter(Q(has_feed_exception=True) | Q(has_page_exception=True)).count(),
'update_queue': Feed.objects.filter(next_scheduled_update__lte=datetime.datetime.now()).count(),
}