Adding cached template loader. Adding new mongodb.

2025-04-13 09:42:01 +00:00 · 2013-03-22 13:36:32 -07:00 · 2013-03-22 13:36:32 -07:00 · e91d11e068
commit e91d11e068
parent c4b6bb6fd8
6 changed files with 46 additions and 26 deletions
--- a/apps/rss_feeds/migrations/0063_story_hash.py
+++ b/apps/rss_feeds/migrations/0063_story_hash.py
@ -1,17 +1,32 @@
 # -*- coding: utf-8 -*-
 from south.v2 import DataMigration
-from apps.rss_feeds.models import MStory, Feed

 class Migration(DataMigration):

    def forwards(self, orm):
+        from apps.rss_feeds.models import MStory, Feed
+        import time
+
+        batch = 0
        for f in xrange(Feed.objects.latest('pk').pk):
-            feed = Feed.get_by_id(f)
+            if f < batch*100000: continue
+            start = time.time()
+            try:
+                feed = Feed.get_by_id(f)
+            except Feed.DoesNotExist:
+                continue
            if not feed: continue
+            cp1 = time.time() - start
+            if feed.active_premium_subscribers < 1: continue
            stories = MStory.objects.filter(story_feed_id=feed.pk, story_hash__exists=False)
-            print "%3s stories: %s " % (stories.count(), feed)
-            for story in stories: story.save()
-            
+            cp2 = time.time() - start
+            try:
+                for story in stories: story.save()
+            except Exception, e:
+                print " ***> (%s) %s" % (f, e)
+            cp3 = time.time() - start
+            print "%3s stories: %s (%s/%s/%s)" % (stories.count(), feed, round(cp1, 2), round(cp2, 2), round(cp3, 2))
+    


    def backwards(self, orm):
--- a/apps/rss_feeds/models.py
+++ b/apps/rss_feeds/models.py
@ -205,13 +205,6 @@ class Feed(models.Model):
                logging.debug(" ---> ~FRFound different feed (%s), merging..." % duplicate_feeds[0])
                feed = Feed.get_by_id(merge_feeds(duplicate_feeds[0].pk, self.pk))
                return feed
-            else:
-                duplicate_feeds = Feed.objects.filter(
-                    hash_address_and_link=self.hash_address_and_link)
-                if self.pk != duplicate_feeds[0].pk:
-                    feed = Feed.get_by_id(merge_feeds(duplicate_feeds[0].pk, self.pk))
-                    return feed
-                return duplicate_feeds[0]
                
            return self

--- a/config/munin.conf
+++ b/config/munin.conf
@ -1,8 +1,8 @@
 # Configfile for Munin master
-dbdir       /var/lib/munin
-htmldir     /var/www/munin
-logdir      /var/log/munin
-rundir      /var/run/munin
+# dbdir       /var/lib/munin
+# htmldir     /var/www/munin
+# logdir      /var/log/munin
+# rundir      /var/run/munin

 includedir /etc/munin/munin-conf.d

--- a/fabfile.py
+++ b/fabfile.py
@ -1,5 +1,6 @@
 from fabric.api import cd, env, local, parallel, serial
 from fabric.api import put, run, settings, sudo
+from fabric.operations import prompt
 # from fabric.colors import red, green, blue, cyan, magenta, white, yellow
 from boto.s3.connection import S3Connection
 from boto.s3.key import Key
@ -77,9 +78,11 @@ env.roledefs ={
           'db12.newsblur.com',
           'db20.newsblur.com',
           'db21.newsblur.com',
+           'db22.newsblur.com',
           ],
    'dbdo':['198.211.115.113',
            '198.211.115.153',
+            '198.211.115.8',
            ],
    'task': ['task01.newsblur.com', 
             'task02.newsblur.com', 
@ -99,12 +102,11 @@ env.roledefs ={
                'ec2-50-17-12-16.compute-1.amazonaws.com',
                'ec2-54-242-34-138.compute-1.amazonaws.com',
                'ec2-184-73-2-61.compute-1.amazonaws.com',
-                
                'ec2-54-234-211-75.compute-1.amazonaws.com',
-                'ec2-50-16-97-13.compute-1.amazonaws.com',
                'ec2-54-242-131-232.compute-1.amazonaws.com',
                'ec2-75-101-195-131.compute-1.amazonaws.com',
                'ec2-54-242-105-17.compute-1.amazonaws.com',
+                'ec2-107-20-76-111.compute-1.amazonaws.com',
                ],
    'vps': ['task01.newsblur.com', 
            'task03.newsblur.com', 
@ -823,7 +825,7 @@ def setup_memcached():
    sudo('apt-get -y install memcached')

 def setup_postgres(standby=False):
-    shmmax = 599585856
+    shmmax = 1140047872
    sudo('apt-get -y install postgresql postgresql-client postgresql-contrib libpq-dev')
    put('config/postgresql%s.conf' % (
        ('_standby' if standby else ''),
@ -881,6 +883,7 @@ def setup_munin():
    sudo('chmod u+x /etc/init.d/spawn_fcgi_munin_graph')
    sudo('/etc/init.d/spawn_fcgi_munin_graph start')
    sudo('update-rc.d spawn_fcgi_munin_graph defaults')
+    sudo('/etc/init.d/munin-node restart')

    
 def setup_db_munin():
@ -1054,12 +1057,15 @@ def setup_ec2():
 # = Tasks - DB =
 # ==============

-def restore_postgres(port=5432):
-    backup_date = '2012-08-17-08-00'
+def restore_postgres(port=5433):
+    backup_date = '2013-01-29-09-00'
+    yes = prompt("Dropping and creating NewsBlur PGSQL db. Sure?")
+    if yes != 'y': return
    # run('PYTHONPATH=%s python utils/backups/s3.py get backup_postgresql_%s.sql.gz' % (env.NEWSBLUR_PATH, backup_date))
    # sudo('su postgres -c "createuser -p %s -U newsblur"' % (port,))
-    sudo('su postgres -c "createdb newsblur -p %s -O newsblur"' % (port,))
-    sudo('su postgres -c "pg_restore -p %s --role=newsblur --dbname=newsblur backup_postgresql_%s.sql.gz"' % (port, backup_date))
+    run('dropdb newsblur -p %s -U postgres' % (port,), pty=False)
+    run('createdb newsblur -p %s -O newsblur' % (port,), pty=False)
+    run('pg_restore -p %s --role=newsblur --dbname=newsblur /Users/sclay/Documents/backups/backup_postgresql_%s.sql.gz' % (port, backup_date), pty=False)
    
 def restore_mongo():
    backup_date = '2012-07-24-09-00'
--- a/settings.py
+++ b/settings.py
@ -87,8 +87,10 @@ DEVELOPMENT = NEWSBLUR_DIR.find('/Users/') == 0
 # ===========================

 TEMPLATE_LOADERS = (
-    'django.template.loaders.filesystem.Loader',
-    'django.template.loaders.app_directories.Loader',
+    ('django.template.loaders.cached.Loader', (
+        'django.template.loaders.filesystem.Loader',
+        'django.template.loaders.app_directories.Loader',
+    )),
 )
 TEMPLATE_CONTEXT_PROCESSORS = (
    "django.contrib.auth.context_processors.auth",
--- a/templates/maintenance_off.html
+++ b/templates/maintenance_off.html
@ -77,7 +77,11 @@
                        <img src="/media/img/logo_512.png" class="logo">
                        <h1>NewsBlur is in <span class="error404">maintenance mode</span></h1>
                        <div class="description">
-                            <p>After a failed replication, MongoDB is throwing a fit. Performing a repair on both primary and secondary databases. This will immediately speed up the site. Unfortunately, it takes an hour to complete. It is now past midnight on the east coast.</p>
+                            <p>Sorry for the maintenance, but today and tomorrow are going to be difficult for me. I am spinning up a new mega-huge database for MongoDB, but this will take at least an hour. I could keep NewsBlur running, but it would be extremely slow. Once the new db is up, it'll buy me some time to do proper sharding.</p>
+                            
+                            <p>Here's the deal (technical details trigger warning): I need to create a usable shard key for my mongodb cluster. The problem is that I the shard key I thought I would be using a month ago is unique but sparse. Turns out you can't use a shard key that doesn't include the unique index as a prefix. So I'm stuck creating a new unique index on 100GB of data. Not easy, and not quick.</p>
+                            <p>To make matters worse, the primary mongodb database machine (I'm currently running replica sets without sharding) hit 100% disk utilization two days ago, hence the ridiculous load times. It's a 32GB machine. I'm migrating over to a 96GB machine, which should immediately alleviate disk utilization and drastically lower load times. It's not true scale, but it does buy me time.</p>
+                            <p>I need the time to create a better unique index that can also be used as a viable shard key. At that point I can finally shard, and then true scale will be achieved. If I ever need more capacity, I just drop in a new set of three machines (a 32GB replica set primary, and two 16GB secondaries used for the feed fetcher's read queries).</p>
                            <p>To pass the time, go surf <a href="http://mlkshk.com/popular">MLKSHK's popular page</a>.</p>
                            <p></p>
                        </div>