Adding cached template loader. Adding new mongodb.

This commit is contained in:
Samuel Clay 2013-03-22 13:36:32 -07:00
parent c4b6bb6fd8
commit e91d11e068
6 changed files with 46 additions and 26 deletions

View file

@ -1,17 +1,32 @@
# -*- coding: utf-8 -*-
from south.v2 import DataMigration
from apps.rss_feeds.models import MStory, Feed
class Migration(DataMigration):
def forwards(self, orm):
from apps.rss_feeds.models import MStory, Feed
import time
batch = 0
for f in xrange(Feed.objects.latest('pk').pk):
feed = Feed.get_by_id(f)
if f < batch*100000: continue
start = time.time()
try:
feed = Feed.get_by_id(f)
except Feed.DoesNotExist:
continue
if not feed: continue
cp1 = time.time() - start
if feed.active_premium_subscribers < 1: continue
stories = MStory.objects.filter(story_feed_id=feed.pk, story_hash__exists=False)
print "%3s stories: %s " % (stories.count(), feed)
for story in stories: story.save()
cp2 = time.time() - start
try:
for story in stories: story.save()
except Exception, e:
print " ***> (%s) %s" % (f, e)
cp3 = time.time() - start
print "%3s stories: %s (%s/%s/%s)" % (stories.count(), feed, round(cp1, 2), round(cp2, 2), round(cp3, 2))
def backwards(self, orm):

View file

@ -205,13 +205,6 @@ class Feed(models.Model):
logging.debug(" ---> ~FRFound different feed (%s), merging..." % duplicate_feeds[0])
feed = Feed.get_by_id(merge_feeds(duplicate_feeds[0].pk, self.pk))
return feed
else:
duplicate_feeds = Feed.objects.filter(
hash_address_and_link=self.hash_address_and_link)
if self.pk != duplicate_feeds[0].pk:
feed = Feed.get_by_id(merge_feeds(duplicate_feeds[0].pk, self.pk))
return feed
return duplicate_feeds[0]
return self

View file

@ -1,8 +1,8 @@
# Configfile for Munin master
dbdir /var/lib/munin
htmldir /var/www/munin
logdir /var/log/munin
rundir /var/run/munin
# dbdir /var/lib/munin
# htmldir /var/www/munin
# logdir /var/log/munin
# rundir /var/run/munin
includedir /etc/munin/munin-conf.d

20
fabfile.py vendored
View file

@ -1,5 +1,6 @@
from fabric.api import cd, env, local, parallel, serial
from fabric.api import put, run, settings, sudo
from fabric.operations import prompt
# from fabric.colors import red, green, blue, cyan, magenta, white, yellow
from boto.s3.connection import S3Connection
from boto.s3.key import Key
@ -77,9 +78,11 @@ env.roledefs ={
'db12.newsblur.com',
'db20.newsblur.com',
'db21.newsblur.com',
'db22.newsblur.com',
],
'dbdo':['198.211.115.113',
'198.211.115.153',
'198.211.115.8',
],
'task': ['task01.newsblur.com',
'task02.newsblur.com',
@ -99,12 +102,11 @@ env.roledefs ={
'ec2-50-17-12-16.compute-1.amazonaws.com',
'ec2-54-242-34-138.compute-1.amazonaws.com',
'ec2-184-73-2-61.compute-1.amazonaws.com',
'ec2-54-234-211-75.compute-1.amazonaws.com',
'ec2-50-16-97-13.compute-1.amazonaws.com',
'ec2-54-242-131-232.compute-1.amazonaws.com',
'ec2-75-101-195-131.compute-1.amazonaws.com',
'ec2-54-242-105-17.compute-1.amazonaws.com',
'ec2-107-20-76-111.compute-1.amazonaws.com',
],
'vps': ['task01.newsblur.com',
'task03.newsblur.com',
@ -823,7 +825,7 @@ def setup_memcached():
sudo('apt-get -y install memcached')
def setup_postgres(standby=False):
shmmax = 599585856
shmmax = 1140047872
sudo('apt-get -y install postgresql postgresql-client postgresql-contrib libpq-dev')
put('config/postgresql%s.conf' % (
('_standby' if standby else ''),
@ -881,6 +883,7 @@ def setup_munin():
sudo('chmod u+x /etc/init.d/spawn_fcgi_munin_graph')
sudo('/etc/init.d/spawn_fcgi_munin_graph start')
sudo('update-rc.d spawn_fcgi_munin_graph defaults')
sudo('/etc/init.d/munin-node restart')
def setup_db_munin():
@ -1054,12 +1057,15 @@ def setup_ec2():
# = Tasks - DB =
# ==============
def restore_postgres(port=5432):
backup_date = '2012-08-17-08-00'
def restore_postgres(port=5433):
backup_date = '2013-01-29-09-00'
yes = prompt("Dropping and creating NewsBlur PGSQL db. Sure?")
if yes != 'y': return
# run('PYTHONPATH=%s python utils/backups/s3.py get backup_postgresql_%s.sql.gz' % (env.NEWSBLUR_PATH, backup_date))
# sudo('su postgres -c "createuser -p %s -U newsblur"' % (port,))
sudo('su postgres -c "createdb newsblur -p %s -O newsblur"' % (port,))
sudo('su postgres -c "pg_restore -p %s --role=newsblur --dbname=newsblur backup_postgresql_%s.sql.gz"' % (port, backup_date))
run('dropdb newsblur -p %s -U postgres' % (port,), pty=False)
run('createdb newsblur -p %s -O newsblur' % (port,), pty=False)
run('pg_restore -p %s --role=newsblur --dbname=newsblur /Users/sclay/Documents/backups/backup_postgresql_%s.sql.gz' % (port, backup_date), pty=False)
def restore_mongo():
backup_date = '2012-07-24-09-00'

View file

@ -87,8 +87,10 @@ DEVELOPMENT = NEWSBLUR_DIR.find('/Users/') == 0
# ===========================
TEMPLATE_LOADERS = (
'django.template.loaders.filesystem.Loader',
'django.template.loaders.app_directories.Loader',
('django.template.loaders.cached.Loader', (
'django.template.loaders.filesystem.Loader',
'django.template.loaders.app_directories.Loader',
)),
)
TEMPLATE_CONTEXT_PROCESSORS = (
"django.contrib.auth.context_processors.auth",

View file

@ -77,7 +77,11 @@
<img src="/media/img/logo_512.png" class="logo">
<h1>NewsBlur is in <span class="error404">maintenance mode</span></h1>
<div class="description">
<p>After a failed replication, MongoDB is throwing a fit. Performing a repair on both primary and secondary databases. This will immediately speed up the site. Unfortunately, it takes an hour to complete. It is now past midnight on the east coast.</p>
<p>Sorry for the maintenance, but today and tomorrow are going to be difficult for me. I am spinning up a new mega-huge database for MongoDB, but this will take at least an hour. I could keep NewsBlur running, but it would be extremely slow. Once the new db is up, it'll buy me some time to do proper sharding.</p>
<p>Here's the deal (technical details trigger warning): I need to create a usable shard key for my mongodb cluster. The problem is that I the shard key I thought I would be using a month ago is unique but sparse. Turns out you can't use a shard key that doesn't include the unique index as a prefix. So I'm stuck creating a new unique index on 100GB of data. Not easy, and not quick.</p>
<p>To make matters worse, the primary mongodb database machine (I'm currently running replica sets without sharding) hit 100% disk utilization two days ago, hence the ridiculous load times. It's a 32GB machine. I'm migrating over to a 96GB machine, which should immediately alleviate disk utilization and drastically lower load times. It's not true scale, but it does buy me time.</p>
<p>I need the time to create a better unique index that can also be used as a viable shard key. At that point I can finally shard, and then true scale will be achieved. If I ever need more capacity, I just drop in a new set of three machines (a 32GB replica set primary, and two 16GB secondaries used for the feed fetcher's read queries).</p>
<p>To pass the time, go surf <a href="http://mlkshk.com/popular">MLKSHK's popular page</a>.</p>
<p></p>
</div>