mirror of
https://github.com/samuelclay/NewsBlur.git
synced 2025-04-13 09:42:01 +00:00
Migrating to S3 for feed pages.
This commit is contained in:
parent
801fe4f20d
commit
00ba259c66
6 changed files with 82 additions and 13 deletions
|
@ -564,6 +564,11 @@ def load_feed_page(request, feed_id):
|
|||
raise Http404
|
||||
|
||||
feed = Feed.get_by_id(feed_id)
|
||||
|
||||
if feed.has_page and not feed.has_page_exception and feed.s3_page:
|
||||
return HttpResponseRedirect('//%s/%s' % (settings.S3_PAGES_BUCKET_NAME,
|
||||
feed.s3_pages_key))
|
||||
|
||||
data = MFeedPage.get_data(feed_id=feed_id)
|
||||
|
||||
if not data or not feed.has_page or feed.has_page_exception:
|
||||
|
|
|
@ -6,8 +6,10 @@ import scipy.cluster
|
|||
import urlparse
|
||||
import struct
|
||||
import operator
|
||||
import gzip
|
||||
import BmpImagePlugin, PngImagePlugin, Image
|
||||
from StringIO import StringIO
|
||||
from django.conf import settings
|
||||
from apps.rss_feeds.models import MFeedPage, MFeedIcon
|
||||
from utils.feed_functions import timelimit, TimeoutError
|
||||
|
||||
|
@ -146,6 +148,12 @@ class IconImporter(object):
|
|||
image_file = None
|
||||
if self.page_data:
|
||||
content = self.page_data
|
||||
elif settings.BACKED_BY_AWS.get('pages_on_s3') and self.feed.s3_page:
|
||||
key = settings.S3_PAGES_BUCKET.get_key(self.feed.s3_pages_key)
|
||||
compressed_content = key.get_contents_as_string()
|
||||
stream = StringIO(compressed_content)
|
||||
gz = gzip.GzipFile(fileobj=stream)
|
||||
content = gz.read()
|
||||
else:
|
||||
content = MFeedPage.get_data(feed_id=self.feed.pk)
|
||||
url = self._url_from_html(content)
|
||||
|
|
|
@ -69,6 +69,8 @@ class Feed(models.Model):
|
|||
last_load_time = models.IntegerField(default=0)
|
||||
favicon_color = models.CharField(max_length=6, null=True, blank=True)
|
||||
favicon_not_found = models.BooleanField(default=False)
|
||||
s3_page = models.NullBooleanField(default=False, blank=True, null=True)
|
||||
s3_icon = models.NullBooleanField(default=False, blank=True, null=True)
|
||||
|
||||
class Meta:
|
||||
db_table="feeds"
|
||||
|
@ -95,6 +97,11 @@ class Feed(models.Model):
|
|||
Site.objects.get_current().domain,
|
||||
self.favicon_url
|
||||
)
|
||||
|
||||
@property
|
||||
def s3_pages_key(self):
|
||||
return "%s.gz.html" % self.pk
|
||||
|
||||
def canonical(self, full=False, include_favicon=True):
|
||||
feed = {
|
||||
'id': self.pk,
|
||||
|
|
|
@ -6,6 +6,9 @@ import feedparser
|
|||
import time
|
||||
import urllib2
|
||||
import httplib
|
||||
import gzip
|
||||
import StringIO
|
||||
from boto.s3.key import Key
|
||||
from django.conf import settings
|
||||
from utils import log as logging
|
||||
from apps.rss_feeds.models import MFeedPage
|
||||
|
@ -169,9 +172,32 @@ class PageImporter(object):
|
|||
|
||||
def save_page(self, html):
|
||||
if html and len(html) > 100:
|
||||
feed_page, created = MFeedPage.objects.get_or_create(feed_id=self.feed.pk,
|
||||
auto_save=True)
|
||||
feed_page.page_data = html
|
||||
feed_page.save()
|
||||
if settings.BACKED_BY_AWS.get('pages_on_s3'):
|
||||
k = Key(settings.S3_PAGES_BUCKET)
|
||||
k.key = self.feed.s3_pages_key
|
||||
k.set_metadata('Content-Encoding', 'gzip')
|
||||
k.set_metadata('Content-Type', 'text/html')
|
||||
k.set_metadata('Access-Control-Allow-Origin', '*')
|
||||
out = StringIO.StringIO()
|
||||
f = gzip.GzipFile(fileobj=out, mode='w')
|
||||
f.write(html)
|
||||
f.close()
|
||||
compressed_html = out.getvalue()
|
||||
k.set_contents_from_string(compressed_html)
|
||||
k.set_acl('public-read')
|
||||
|
||||
if False and not self.feed.s3_page:
|
||||
try:
|
||||
feed_page = MFeedPage.objects.get(feed_id=self.feed.pk)
|
||||
feed_page.delete()
|
||||
logging.debug(' --->> [%-30s] ~FYTransfering page data to S3...' % (self.feed))
|
||||
except MFeedPage.DoesNotExist:
|
||||
pass
|
||||
|
||||
return feed_page
|
||||
self.feed.s3_page = True
|
||||
self.feed.save()
|
||||
else:
|
||||
feed_page, _ = MFeedPage.objects.get_or_create(feed_id=self.feed.pk)
|
||||
feed_page.page_data = html
|
||||
feed_page.save()
|
||||
return feed_page
|
||||
|
|
|
@ -76,19 +76,19 @@ MONGODB_SLAVE = {
|
|||
'host': '127.0.0.1'
|
||||
}
|
||||
|
||||
# Celery RabbitMQ Broker
|
||||
BROKER_HOST = "127.0.0.1"
|
||||
# Celery RabbitMQ/Redis Broker
|
||||
CELERY_REDIS_HOST = "127.0.0.1"
|
||||
BROKER_URL = "redis://127.0.0.1:6379/0"
|
||||
|
||||
REDIS = {
|
||||
'host': '127.0.0.1',
|
||||
}
|
||||
|
||||
# AMQP - RabbitMQ server
|
||||
BROKER_HOST = "db01.newsblur.com"
|
||||
BROKER_PORT = 5672
|
||||
BROKER_USER = "newsblur"
|
||||
BROKER_PASSWORD = "newsblur"
|
||||
BROKER_VHOST = "newsblurvhost"
|
||||
BACKED_BY_AWS = {
|
||||
'pages_on_s3': False,
|
||||
'icons_on_s3': False,
|
||||
'stories_on_dynamodb': False,
|
||||
}
|
||||
|
||||
# ===========
|
||||
# = Logging =
|
||||
|
|
23
settings.py
23
settings.py
|
@ -3,6 +3,7 @@ import logging
|
|||
import os
|
||||
import datetime
|
||||
from mongoengine import connect
|
||||
from boto.s3.connection import S3Connection
|
||||
import redis
|
||||
from utils import jammit
|
||||
|
||||
|
@ -409,6 +410,16 @@ FACEBOOK_SECRET = '99999999999999999999999999999999'
|
|||
TWITTER_CONSUMER_KEY = 'ooooooooooooooooooooo'
|
||||
TWITTER_CONSUMER_SECRET = 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
|
||||
|
||||
# ===============
|
||||
# = AWS Backing =
|
||||
# ===============
|
||||
|
||||
BACKED_BY_AWS = {
|
||||
'pages_on_s3': False,
|
||||
'icons_on_s3': False,
|
||||
'stories_on_dynamodb': False,
|
||||
}
|
||||
|
||||
# ==================
|
||||
# = Configurations =
|
||||
# ==================
|
||||
|
@ -424,6 +435,9 @@ TEMPLATE_DEBUG = DEBUG
|
|||
ACCOUNT_ACTIVATION_DAYS = 30
|
||||
AWS_ACCESS_KEY_ID = S3_ACCESS_KEY
|
||||
AWS_SECRET_ACCESS_KEY = S3_SECRET
|
||||
S3_BACKUP_BUCKET = 'newsblur_backups'
|
||||
S3_PAGES_BUCKET_NAME = 'pages.newsblur.com'
|
||||
S3_ICONS_BUCKET_NAME = 'icons.newsblur.com'
|
||||
|
||||
def custom_show_toolbar(request):
|
||||
return DEBUG
|
||||
|
@ -470,3 +484,12 @@ if DEBUG:
|
|||
MIDDLEWARE_CLASSES += ('utils.request_introspection_middleware.DumpRequestMiddleware',)
|
||||
MIDDLEWARE_CLASSES += ('utils.exception_middleware.ConsoleExceptionMiddleware',)
|
||||
|
||||
# =======
|
||||
# = AWS =
|
||||
# =======
|
||||
|
||||
S3_CONN = None
|
||||
if BACKED_BY_AWS.get('pages_on_s3') or BACKED_BY_AWS.get('icons_on_s3'):
|
||||
S3_CONN = S3Connection(S3_ACCESS_KEY, S3_SECRET)
|
||||
S3_PAGES_BUCKET = S3_CONN.get_bucket(S3_PAGES_BUCKET_NAME)
|
||||
S3_ICONS_BUCKET = S3_CONN.get_bucket(S3_ICONS_BUCKET_NAME)
|
||||
|
|
Loading…
Add table
Reference in a new issue