diff --git a/apps/rss_feeds/migrations/0063_story_hash.py b/apps/rss_feeds/migrations/0063_story_hash.py
index 168b25595..4a7e320c8 100644
--- a/apps/rss_feeds/migrations/0063_story_hash.py
+++ b/apps/rss_feeds/migrations/0063_story_hash.py
@@ -1,17 +1,32 @@
# -*- coding: utf-8 -*-
from south.v2 import DataMigration
-from apps.rss_feeds.models import MStory, Feed
class Migration(DataMigration):
def forwards(self, orm):
+ from apps.rss_feeds.models import MStory, Feed
+ import time
+
+ batch = 0
for f in xrange(Feed.objects.latest('pk').pk):
- feed = Feed.get_by_id(f)
+ if f < batch*100000: continue
+ start = time.time()
+ try:
+ feed = Feed.get_by_id(f)
+ except Feed.DoesNotExist:
+ continue
if not feed: continue
+ cp1 = time.time() - start
+ if feed.active_premium_subscribers < 1: continue
stories = MStory.objects.filter(story_feed_id=feed.pk, story_hash__exists=False)
- print "%3s stories: %s " % (stories.count(), feed)
- for story in stories: story.save()
-
+ cp2 = time.time() - start
+ try:
+ for story in stories: story.save()
+ except Exception, e:
+ print " ***> (%s) %s" % (f, e)
+ cp3 = time.time() - start
+ print "%3s stories: %s (%s/%s/%s)" % (stories.count(), feed, round(cp1, 2), round(cp2, 2), round(cp3, 2))
+
def backwards(self, orm):
diff --git a/apps/rss_feeds/models.py b/apps/rss_feeds/models.py
index f9590b6cc..47976d9c9 100644
--- a/apps/rss_feeds/models.py
+++ b/apps/rss_feeds/models.py
@@ -205,13 +205,6 @@ class Feed(models.Model):
logging.debug(" ---> ~FRFound different feed (%s), merging..." % duplicate_feeds[0])
feed = Feed.get_by_id(merge_feeds(duplicate_feeds[0].pk, self.pk))
return feed
- else:
- duplicate_feeds = Feed.objects.filter(
- hash_address_and_link=self.hash_address_and_link)
- if self.pk != duplicate_feeds[0].pk:
- feed = Feed.get_by_id(merge_feeds(duplicate_feeds[0].pk, self.pk))
- return feed
- return duplicate_feeds[0]
return self
diff --git a/config/munin.conf b/config/munin.conf
index 781c685d9..294d60c72 100644
--- a/config/munin.conf
+++ b/config/munin.conf
@@ -1,8 +1,8 @@
# Configfile for Munin master
-dbdir /var/lib/munin
-htmldir /var/www/munin
-logdir /var/log/munin
-rundir /var/run/munin
+# dbdir /var/lib/munin
+# htmldir /var/www/munin
+# logdir /var/log/munin
+# rundir /var/run/munin
includedir /etc/munin/munin-conf.d
diff --git a/fabfile.py b/fabfile.py
index 7a3c4aa13..f0f084be1 100644
--- a/fabfile.py
+++ b/fabfile.py
@@ -1,5 +1,6 @@
from fabric.api import cd, env, local, parallel, serial
from fabric.api import put, run, settings, sudo
+from fabric.operations import prompt
# from fabric.colors import red, green, blue, cyan, magenta, white, yellow
from boto.s3.connection import S3Connection
from boto.s3.key import Key
@@ -77,9 +78,11 @@ env.roledefs ={
'db12.newsblur.com',
'db20.newsblur.com',
'db21.newsblur.com',
+ 'db22.newsblur.com',
],
'dbdo':['198.211.115.113',
'198.211.115.153',
+ '198.211.115.8',
],
'task': ['task01.newsblur.com',
'task02.newsblur.com',
@@ -99,12 +102,11 @@ env.roledefs ={
'ec2-50-17-12-16.compute-1.amazonaws.com',
'ec2-54-242-34-138.compute-1.amazonaws.com',
'ec2-184-73-2-61.compute-1.amazonaws.com',
-
'ec2-54-234-211-75.compute-1.amazonaws.com',
- 'ec2-50-16-97-13.compute-1.amazonaws.com',
'ec2-54-242-131-232.compute-1.amazonaws.com',
'ec2-75-101-195-131.compute-1.amazonaws.com',
'ec2-54-242-105-17.compute-1.amazonaws.com',
+ 'ec2-107-20-76-111.compute-1.amazonaws.com',
],
'vps': ['task01.newsblur.com',
'task03.newsblur.com',
@@ -823,7 +825,7 @@ def setup_memcached():
sudo('apt-get -y install memcached')
def setup_postgres(standby=False):
- shmmax = 599585856
+ shmmax = 1140047872
sudo('apt-get -y install postgresql postgresql-client postgresql-contrib libpq-dev')
put('config/postgresql%s.conf' % (
('_standby' if standby else ''),
@@ -881,6 +883,7 @@ def setup_munin():
sudo('chmod u+x /etc/init.d/spawn_fcgi_munin_graph')
sudo('/etc/init.d/spawn_fcgi_munin_graph start')
sudo('update-rc.d spawn_fcgi_munin_graph defaults')
+ sudo('/etc/init.d/munin-node restart')
def setup_db_munin():
@@ -1054,12 +1057,15 @@ def setup_ec2():
# = Tasks - DB =
# ==============
-def restore_postgres(port=5432):
- backup_date = '2012-08-17-08-00'
+def restore_postgres(port=5433):
+ backup_date = '2013-01-29-09-00'
+ yes = prompt("Dropping and creating NewsBlur PGSQL db. Sure?")
+ if yes != 'y': return
# run('PYTHONPATH=%s python utils/backups/s3.py get backup_postgresql_%s.sql.gz' % (env.NEWSBLUR_PATH, backup_date))
# sudo('su postgres -c "createuser -p %s -U newsblur"' % (port,))
- sudo('su postgres -c "createdb newsblur -p %s -O newsblur"' % (port,))
- sudo('su postgres -c "pg_restore -p %s --role=newsblur --dbname=newsblur backup_postgresql_%s.sql.gz"' % (port, backup_date))
+ run('dropdb newsblur -p %s -U postgres' % (port,), pty=False)
+ run('createdb newsblur -p %s -O newsblur' % (port,), pty=False)
+ run('pg_restore -p %s --role=newsblur --dbname=newsblur /Users/sclay/Documents/backups/backup_postgresql_%s.sql.gz' % (port, backup_date), pty=False)
def restore_mongo():
backup_date = '2012-07-24-09-00'
diff --git a/settings.py b/settings.py
index cc53e3e69..246a9914e 100644
--- a/settings.py
+++ b/settings.py
@@ -87,8 +87,10 @@ DEVELOPMENT = NEWSBLUR_DIR.find('/Users/') == 0
# ===========================
TEMPLATE_LOADERS = (
- 'django.template.loaders.filesystem.Loader',
- 'django.template.loaders.app_directories.Loader',
+ ('django.template.loaders.cached.Loader', (
+ 'django.template.loaders.filesystem.Loader',
+ 'django.template.loaders.app_directories.Loader',
+ )),
)
TEMPLATE_CONTEXT_PROCESSORS = (
"django.contrib.auth.context_processors.auth",
diff --git a/templates/maintenance_off.html b/templates/maintenance_off.html
index 3aad43dd7..62775ceb0 100644
--- a/templates/maintenance_off.html
+++ b/templates/maintenance_off.html
@@ -77,7 +77,11 @@
After a failed replication, MongoDB is throwing a fit. Performing a repair on both primary and secondary databases. This will immediately speed up the site. Unfortunately, it takes an hour to complete. It is now past midnight on the east coast.
+Sorry for the maintenance, but today and tomorrow are going to be difficult for me. I am spinning up a new mega-huge database for MongoDB, but this will take at least an hour. I could keep NewsBlur running, but it would be extremely slow. Once the new db is up, it'll buy me some time to do proper sharding.
+ +Here's the deal (technical details trigger warning): I need to create a usable shard key for my mongodb cluster. The problem is that I the shard key I thought I would be using a month ago is unique but sparse. Turns out you can't use a shard key that doesn't include the unique index as a prefix. So I'm stuck creating a new unique index on 100GB of data. Not easy, and not quick.
+To make matters worse, the primary mongodb database machine (I'm currently running replica sets without sharding) hit 100% disk utilization two days ago, hence the ridiculous load times. It's a 32GB machine. I'm migrating over to a 96GB machine, which should immediately alleviate disk utilization and drastically lower load times. It's not true scale, but it does buy me time.
+I need the time to create a better unique index that can also be used as a viable shard key. At that point I can finally shard, and then true scale will be achieved. If I ever need more capacity, I just drop in a new set of three machines (a 32GB replica set primary, and two 16GB secondaries used for the feed fetcher's read queries).
To pass the time, go surf MLKSHK's popular page.