mirror of
https://github.com/samuelclay/NewsBlur.git
synced 2025-08-05 16:58:59 +00:00
Migrating to S3 for feed pages.
This commit is contained in:
parent
801fe4f20d
commit
00ba259c66
6 changed files with 82 additions and 13 deletions
|
@ -564,6 +564,11 @@ def load_feed_page(request, feed_id):
|
||||||
raise Http404
|
raise Http404
|
||||||
|
|
||||||
feed = Feed.get_by_id(feed_id)
|
feed = Feed.get_by_id(feed_id)
|
||||||
|
|
||||||
|
if feed.has_page and not feed.has_page_exception and feed.s3_page:
|
||||||
|
return HttpResponseRedirect('//%s/%s' % (settings.S3_PAGES_BUCKET_NAME,
|
||||||
|
feed.s3_pages_key))
|
||||||
|
|
||||||
data = MFeedPage.get_data(feed_id=feed_id)
|
data = MFeedPage.get_data(feed_id=feed_id)
|
||||||
|
|
||||||
if not data or not feed.has_page or feed.has_page_exception:
|
if not data or not feed.has_page or feed.has_page_exception:
|
||||||
|
|
|
@ -6,8 +6,10 @@ import scipy.cluster
|
||||||
import urlparse
|
import urlparse
|
||||||
import struct
|
import struct
|
||||||
import operator
|
import operator
|
||||||
|
import gzip
|
||||||
import BmpImagePlugin, PngImagePlugin, Image
|
import BmpImagePlugin, PngImagePlugin, Image
|
||||||
from StringIO import StringIO
|
from StringIO import StringIO
|
||||||
|
from django.conf import settings
|
||||||
from apps.rss_feeds.models import MFeedPage, MFeedIcon
|
from apps.rss_feeds.models import MFeedPage, MFeedIcon
|
||||||
from utils.feed_functions import timelimit, TimeoutError
|
from utils.feed_functions import timelimit, TimeoutError
|
||||||
|
|
||||||
|
@ -146,6 +148,12 @@ class IconImporter(object):
|
||||||
image_file = None
|
image_file = None
|
||||||
if self.page_data:
|
if self.page_data:
|
||||||
content = self.page_data
|
content = self.page_data
|
||||||
|
elif settings.BACKED_BY_AWS.get('pages_on_s3') and self.feed.s3_page:
|
||||||
|
key = settings.S3_PAGES_BUCKET.get_key(self.feed.s3_pages_key)
|
||||||
|
compressed_content = key.get_contents_as_string()
|
||||||
|
stream = StringIO(compressed_content)
|
||||||
|
gz = gzip.GzipFile(fileobj=stream)
|
||||||
|
content = gz.read()
|
||||||
else:
|
else:
|
||||||
content = MFeedPage.get_data(feed_id=self.feed.pk)
|
content = MFeedPage.get_data(feed_id=self.feed.pk)
|
||||||
url = self._url_from_html(content)
|
url = self._url_from_html(content)
|
||||||
|
|
|
@ -69,6 +69,8 @@ class Feed(models.Model):
|
||||||
last_load_time = models.IntegerField(default=0)
|
last_load_time = models.IntegerField(default=0)
|
||||||
favicon_color = models.CharField(max_length=6, null=True, blank=True)
|
favicon_color = models.CharField(max_length=6, null=True, blank=True)
|
||||||
favicon_not_found = models.BooleanField(default=False)
|
favicon_not_found = models.BooleanField(default=False)
|
||||||
|
s3_page = models.NullBooleanField(default=False, blank=True, null=True)
|
||||||
|
s3_icon = models.NullBooleanField(default=False, blank=True, null=True)
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
db_table="feeds"
|
db_table="feeds"
|
||||||
|
@ -95,6 +97,11 @@ class Feed(models.Model):
|
||||||
Site.objects.get_current().domain,
|
Site.objects.get_current().domain,
|
||||||
self.favicon_url
|
self.favicon_url
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def s3_pages_key(self):
|
||||||
|
return "%s.gz.html" % self.pk
|
||||||
|
|
||||||
def canonical(self, full=False, include_favicon=True):
|
def canonical(self, full=False, include_favicon=True):
|
||||||
feed = {
|
feed = {
|
||||||
'id': self.pk,
|
'id': self.pk,
|
||||||
|
|
|
@ -6,6 +6,9 @@ import feedparser
|
||||||
import time
|
import time
|
||||||
import urllib2
|
import urllib2
|
||||||
import httplib
|
import httplib
|
||||||
|
import gzip
|
||||||
|
import StringIO
|
||||||
|
from boto.s3.key import Key
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from utils import log as logging
|
from utils import log as logging
|
||||||
from apps.rss_feeds.models import MFeedPage
|
from apps.rss_feeds.models import MFeedPage
|
||||||
|
@ -169,9 +172,32 @@ class PageImporter(object):
|
||||||
|
|
||||||
def save_page(self, html):
|
def save_page(self, html):
|
||||||
if html and len(html) > 100:
|
if html and len(html) > 100:
|
||||||
feed_page, created = MFeedPage.objects.get_or_create(feed_id=self.feed.pk,
|
if settings.BACKED_BY_AWS.get('pages_on_s3'):
|
||||||
auto_save=True)
|
k = Key(settings.S3_PAGES_BUCKET)
|
||||||
|
k.key = self.feed.s3_pages_key
|
||||||
|
k.set_metadata('Content-Encoding', 'gzip')
|
||||||
|
k.set_metadata('Content-Type', 'text/html')
|
||||||
|
k.set_metadata('Access-Control-Allow-Origin', '*')
|
||||||
|
out = StringIO.StringIO()
|
||||||
|
f = gzip.GzipFile(fileobj=out, mode='w')
|
||||||
|
f.write(html)
|
||||||
|
f.close()
|
||||||
|
compressed_html = out.getvalue()
|
||||||
|
k.set_contents_from_string(compressed_html)
|
||||||
|
k.set_acl('public-read')
|
||||||
|
|
||||||
|
if False and not self.feed.s3_page:
|
||||||
|
try:
|
||||||
|
feed_page = MFeedPage.objects.get(feed_id=self.feed.pk)
|
||||||
|
feed_page.delete()
|
||||||
|
logging.debug(' --->> [%-30s] ~FYTransfering page data to S3...' % (self.feed))
|
||||||
|
except MFeedPage.DoesNotExist:
|
||||||
|
pass
|
||||||
|
|
||||||
|
self.feed.s3_page = True
|
||||||
|
self.feed.save()
|
||||||
|
else:
|
||||||
|
feed_page, _ = MFeedPage.objects.get_or_create(feed_id=self.feed.pk)
|
||||||
feed_page.page_data = html
|
feed_page.page_data = html
|
||||||
feed_page.save()
|
feed_page.save()
|
||||||
|
|
||||||
return feed_page
|
return feed_page
|
||||||
|
|
|
@ -76,19 +76,19 @@ MONGODB_SLAVE = {
|
||||||
'host': '127.0.0.1'
|
'host': '127.0.0.1'
|
||||||
}
|
}
|
||||||
|
|
||||||
# Celery RabbitMQ Broker
|
# Celery RabbitMQ/Redis Broker
|
||||||
BROKER_HOST = "127.0.0.1"
|
CELERY_REDIS_HOST = "127.0.0.1"
|
||||||
|
BROKER_URL = "redis://127.0.0.1:6379/0"
|
||||||
|
|
||||||
REDIS = {
|
REDIS = {
|
||||||
'host': '127.0.0.1',
|
'host': '127.0.0.1',
|
||||||
}
|
}
|
||||||
|
|
||||||
# AMQP - RabbitMQ server
|
BACKED_BY_AWS = {
|
||||||
BROKER_HOST = "db01.newsblur.com"
|
'pages_on_s3': False,
|
||||||
BROKER_PORT = 5672
|
'icons_on_s3': False,
|
||||||
BROKER_USER = "newsblur"
|
'stories_on_dynamodb': False,
|
||||||
BROKER_PASSWORD = "newsblur"
|
}
|
||||||
BROKER_VHOST = "newsblurvhost"
|
|
||||||
|
|
||||||
# ===========
|
# ===========
|
||||||
# = Logging =
|
# = Logging =
|
||||||
|
|
23
settings.py
23
settings.py
|
@ -3,6 +3,7 @@ import logging
|
||||||
import os
|
import os
|
||||||
import datetime
|
import datetime
|
||||||
from mongoengine import connect
|
from mongoengine import connect
|
||||||
|
from boto.s3.connection import S3Connection
|
||||||
import redis
|
import redis
|
||||||
from utils import jammit
|
from utils import jammit
|
||||||
|
|
||||||
|
@ -409,6 +410,16 @@ FACEBOOK_SECRET = '99999999999999999999999999999999'
|
||||||
TWITTER_CONSUMER_KEY = 'ooooooooooooooooooooo'
|
TWITTER_CONSUMER_KEY = 'ooooooooooooooooooooo'
|
||||||
TWITTER_CONSUMER_SECRET = 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
|
TWITTER_CONSUMER_SECRET = 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
|
||||||
|
|
||||||
|
# ===============
|
||||||
|
# = AWS Backing =
|
||||||
|
# ===============
|
||||||
|
|
||||||
|
BACKED_BY_AWS = {
|
||||||
|
'pages_on_s3': False,
|
||||||
|
'icons_on_s3': False,
|
||||||
|
'stories_on_dynamodb': False,
|
||||||
|
}
|
||||||
|
|
||||||
# ==================
|
# ==================
|
||||||
# = Configurations =
|
# = Configurations =
|
||||||
# ==================
|
# ==================
|
||||||
|
@ -424,6 +435,9 @@ TEMPLATE_DEBUG = DEBUG
|
||||||
ACCOUNT_ACTIVATION_DAYS = 30
|
ACCOUNT_ACTIVATION_DAYS = 30
|
||||||
AWS_ACCESS_KEY_ID = S3_ACCESS_KEY
|
AWS_ACCESS_KEY_ID = S3_ACCESS_KEY
|
||||||
AWS_SECRET_ACCESS_KEY = S3_SECRET
|
AWS_SECRET_ACCESS_KEY = S3_SECRET
|
||||||
|
S3_BACKUP_BUCKET = 'newsblur_backups'
|
||||||
|
S3_PAGES_BUCKET_NAME = 'pages.newsblur.com'
|
||||||
|
S3_ICONS_BUCKET_NAME = 'icons.newsblur.com'
|
||||||
|
|
||||||
def custom_show_toolbar(request):
|
def custom_show_toolbar(request):
|
||||||
return DEBUG
|
return DEBUG
|
||||||
|
@ -470,3 +484,12 @@ if DEBUG:
|
||||||
MIDDLEWARE_CLASSES += ('utils.request_introspection_middleware.DumpRequestMiddleware',)
|
MIDDLEWARE_CLASSES += ('utils.request_introspection_middleware.DumpRequestMiddleware',)
|
||||||
MIDDLEWARE_CLASSES += ('utils.exception_middleware.ConsoleExceptionMiddleware',)
|
MIDDLEWARE_CLASSES += ('utils.exception_middleware.ConsoleExceptionMiddleware',)
|
||||||
|
|
||||||
|
# =======
|
||||||
|
# = AWS =
|
||||||
|
# =======
|
||||||
|
|
||||||
|
S3_CONN = None
|
||||||
|
if BACKED_BY_AWS.get('pages_on_s3') or BACKED_BY_AWS.get('icons_on_s3'):
|
||||||
|
S3_CONN = S3Connection(S3_ACCESS_KEY, S3_SECRET)
|
||||||
|
S3_PAGES_BUCKET = S3_CONN.get_bucket(S3_PAGES_BUCKET_NAME)
|
||||||
|
S3_ICONS_BUCKET = S3_CONN.get_bucket(S3_ICONS_BUCKET_NAME)
|
||||||
|
|
Loading…
Add table
Reference in a new issue