From af0fbbdc57899c3c0bc5a868febc1e581f218e03 Mon Sep 17 00:00:00 2001 From: Samuel Clay Date: Fri, 5 Jul 2013 23:23:39 -0700 Subject: [PATCH] Updating redis conf to work with monit. --- clients/android/NewsBlur/.idea/workspace.xml | 2 +- config/redis-init | 106 ++++++++++------ config/redis.conf | 121 ++++++++++++------- templates/maintenance_off.html | 10 +- 4 files changed, 151 insertions(+), 88 deletions(-) diff --git a/clients/android/NewsBlur/.idea/workspace.xml b/clients/android/NewsBlur/.idea/workspace.xml index 1c9f3c21d..8bd691581 100644 --- a/clients/android/NewsBlur/.idea/workspace.xml +++ b/clients/android/NewsBlur/.idea/workspace.xml @@ -408,7 +408,7 @@ - + diff --git a/config/redis-init b/config/redis-init index b15ecc09c..5494c014d 100644 --- a/config/redis-init +++ b/config/redis-init @@ -1,42 +1,68 @@ -#!/bin/sh -# -# Simple Redis init.d script conceived to work on Linux systems -# as it does use of the /proc filesystem. - -REDISPORT=6379 -EXEC=/usr/local/bin/redis-server -CLIEXEC=/usr/local/bin/redis-cli - +### BEGIN INIT INFO +# Provides: redis-server +# Required-Start: $syslog $remote_fs +# Required-Stop: $syslog $remote_fs +# Should-Start: $local_fs +# Should-Stop: $local_fs +# Default-Start: 2 3 4 5 +# Default-Stop: 0 1 6 +# Short-Description: redis-server - Persistent key-value db +# Description: redis-server - Persistent key-value db +### END INIT INFO + +PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin +DAEMON=/usr/local/bin/redis-server +DAEMON_ARGS=/etc/redis.conf +NAME=redis-server +DESC=redis-server PIDFILE=/var/run/redis.pid -CONF="/etc/redis.conf" - + +test -x $DAEMON || exit 0 + +set -e + case "$1" in - start) - if [ -f $PIDFILE ] - then - echo "$PIDFILE exists, process is already running or crashed" - else - echo "Starting Redis server..." - $EXEC $CONF - fi - ;; - stop) - if [ ! -f $PIDFILE ] - then - echo "$PIDFILE does not exist, process is not running" - else - PID=$(cat $PIDFILE) - echo "Stopping ..." - $CLIEXEC -p $REDISPORT shutdown - while [ -x /proc/${PID} ] - do - echo "Waiting for Redis to shutdown ..." - sleep 1 - done - echo "Redis stopped" - fi - ;; - *) - echo "Please use start or stop as first argument" - ;; -esac \ No newline at end of file + start) + echo -n "Starting $DESC: " + touch $PIDFILE + if start-stop-daemon --start --quiet --umask 007 --pidfile $PIDFILE --exec $DAEMON -- $DAEMON_ARGS + then + echo "$NAME." + else + echo "failed" + fi + ;; + stop) + echo -n "Stopping $DESC: " + if start-stop-daemon --stop --retry 10 --quiet --oknodo --pidfile $PIDFILE --exec $DAEMON + then + echo "$NAME." + else + echo "failed" + fi + rm -f $PIDFILE + ;; + + restart|force-reload) + ${0} stop + ${0} start + ;; + + status) + echo -n "$DESC is " + if start-stop-daemon --stop --quiet --signal 0 --name ${NAME} --pidfile ${PIDFILE} + then + echo "running" + else + echo "not running" + exit 1 + fi + ;; + + *) + echo "Usage: /etc/init.d/$NAME {start|stop|restart|force-reload}" >&2 + exit 1 + ;; +esac + +exit 0 \ No newline at end of file diff --git a/config/redis.conf b/config/redis.conf index 1680e6ee3..6ebad276f 100644 --- a/config/redis.conf +++ b/config/redis.conf @@ -39,8 +39,24 @@ port 6379 # Close the connection after a client is idle for N seconds (0 to disable) timeout 0 -# Set server verbosity to 'debug' -# it can be one of: +# TCP keepalive. +# +# If non-zero, use SO_KEEPALIVE to send TCP ACKs to clients in absence +# of communication. This is useful for two reasons: +# +# 1) Detect dead peers. +# 2) Take the connection alive from the point of view of network +# equipment in the middle. +# +# On Linux, the specified value (in seconds) is the period used to send ACKs. +# Note that to close the connection the double of the time is needed. +# On other kernels the period depends on the kernel configuration. +# +# A reasonable value for this option is 60 seconds. +tcp-keepalive 0 + +# Specify the server verbosity level. +# This can be one of: # debug (a lot of information, useful for development/testing) # verbose (many rarely useful info, but not a mess like the debug level) # notice (moderately verbose, what you want in production probably) @@ -59,7 +75,7 @@ logfile /var/log/redis.log # Specify the syslog identity. # syslog-ident redis -# Specify the syslog facility. Must be USER or between LOCAL0-LOCAL7. +# Specify the syslog facility. Must be USER or between LOCAL0-LOCAL7. # syslog-facility local0 # Set the number of databases. The default database is DB 0, you can select @@ -114,7 +130,7 @@ stop-writes-on-bgsave-error no # the dataset will likely be bigger if you have compressible values or keys. rdbcompression yes -# Since verison 5 of RDB a CRC64 checksum is placed at the end of the file. +# Since version 5 of RDB a CRC64 checksum is placed at the end of the file. # This makes the format more resistant to corruption but there is a performance # hit to pay (around 10%) when saving and loading RDB files, so you can disable it # for maximum performances. @@ -131,7 +147,7 @@ dbfilename dump.rdb # The DB will be written inside this directory, with the filename specified # above using the 'dbfilename' configuration directive. # -# Also the Append Only File will be created inside this directory. +# The Append Only File will also be created inside this directory. # # Note that you must specify a directory here, not a file name. dir /var/lib/redis @@ -152,14 +168,14 @@ dir /var/lib/redis # # masterauth -# When a slave lost the connection with the master, or when the replication +# When a slave loses its connection with the master, or when the replication # is still in progress, the slave can act in two different ways: # # 1) if slave-serve-stale-data is set to 'yes' (the default) the slave will # still reply to client requests, possibly with out of date data, or the # data set may just be empty if this is the first synchronization. # -# 2) if slave-serve-stale data is set to 'no' the slave will reply with +# 2) if slave-serve-stale-data is set to 'no' the slave will reply with # an error "SYNC with master in progress" to all the kind of commands # but to INFO and SLAVEOF. # @@ -179,7 +195,7 @@ slave-serve-stale-data yes # such as CONFIG, DEBUG, and so forth. To a limited extend you can improve # security of read only slaves using 'rename-command' to shadow all the # administrative / dangerous commands. -slave-read-only yes +slave-read-only no # Slaves send PINGs to server in a predefined interval. It's possible to change # this interval with the repl_ping_slave_period option. The default value is 10 @@ -196,6 +212,21 @@ slave-read-only yes # # repl-timeout 60 +# Disable TCP_NODELAY on the slave socket after SYNC? +# +# If you select "yes" Redis will use a smaller number of TCP packets and +# less bandwidth to send data to slaves. But this can add a delay for +# the data to appear on the slave side, up to 40 milliseconds with +# Linux kernels using a default configuration. +# +# If you select "no" the delay for data to appear on the slave side will +# be reduced but more bandwidth will be used for replication. +# +# By default we optimize for low latency, but in very high traffic conditions +# or when the master and slaves are many hops away, turning this to "yes" may +# be a good idea. +repl-disable-tcp-nodelay no + # The slave priority is an integer number published by Redis in the INFO output. # It is used by Redis Sentinel in order to select a slave to promote into a # master if the master is no longer working correctly. @@ -230,23 +261,26 @@ slave-priority 100 # # It is possible to change the name of dangerous commands in a shared # environment. For instance the CONFIG command may be renamed into something -# of hard to guess so that it will be still available for internal-use -# tools but not available for general clients. +# hard to guess so that it will still be available for internal-use tools +# but not available for general clients. # # Example: # # rename-command CONFIG b840fc02d524045429941cc15f59e41cb7be6c52 # -# It is also possible to completely kill a command renaming it into +# It is also possible to completely kill a command by renaming it into # an empty string: # # rename-command CONFIG "" +# +# Please note that changing the name of commands that are logged into the +# AOF file or transmitted to slaves may cause problems. ################################### LIMITS #################################### # Set the max number of connected clients at the same time. By default # this limit is set to 10000 clients, however if the Redis server is not -# able ot configure the process file limit to allow for the specified limit +# able to configure the process file limit to allow for the specified limit # the max number of allowed clients is set to the current file limit # minus 32 (as Redis reserves a few file descriptors for internal uses). # @@ -281,7 +315,7 @@ maxclients 100000 # maxmemory # MAXMEMORY POLICY: how Redis will select what to remove when maxmemory -# is reached? You can select among five behavior: +# is reached. You can select among five behaviors: # # volatile-lru -> remove the key with an expire set using an LRU algorithm # allkeys-lru -> remove any key accordingly to the LRU algorithm @@ -290,7 +324,7 @@ maxclients 100000 # volatile-ttl -> remove the key with the nearest expire time (minor TTL) # noeviction -> don't expire at all, just return an error on write operations # -# Note: with all the kind of policies, Redis will return an error on write +# Note: with any of the above policies, Redis will return an error on write # operations, when there are not suitable keys for eviction. # # At the date of writing this commands are: set setnx setex append @@ -346,7 +380,7 @@ appendonly no # always: fsync after every write to the append only log . Slow, Safest. # everysec: fsync only one time every second. Compromise. # -# The default is "everysec" that's usually the right compromise between +# The default is "everysec", as that's usually the right compromise between # speed and data safety. It's up to you to understand if you can relax this to # "no" that will let the operating system flush the output buffer when # it wants, for better performances (but if you can live with the idea of @@ -374,9 +408,9 @@ appendfsync everysec # that will prevent fsync() from being called in the main process while a # BGSAVE or BGREWRITEAOF is in progress. # -# This means that while another child is saving the durability of Redis is -# the same as "appendfsync none", that in practical terms means that it is -# possible to lost up to 30 seconds of log in the worst scenario (with the +# This means that while another child is saving, the durability of Redis is +# the same as "appendfsync none". In practical terms, this means that it is +# possible to lose up to 30 seconds of log in the worst scenario (with the # default Linux settings). # # If you have latency problems turn this to "yes". Otherwise leave it as @@ -385,10 +419,10 @@ no-appendfsync-on-rewrite no # Automatic rewrite of the append only file. # Redis is able to automatically rewrite the log file implicitly calling -# BGREWRITEAOF when the AOF log size will growth by the specified percentage. +# BGREWRITEAOF when the AOF log size grows by the specified percentage. # # This is how it works: Redis remembers the size of the AOF file after the -# latest rewrite (or if no rewrite happened since the restart, the size of +# latest rewrite (if no rewrite has happened since the restart, the size of # the AOF at startup is used). # # This base size is compared to the current size. If the current size is @@ -421,25 +455,6 @@ auto-aof-rewrite-min-size 64mb # Set it to 0 or a negative value for unlimited execution without warnings. lua-time-limit 1000 -################################ REDIS CLUSTER ############################### -# -# Normal Redis instances can't be part of a Redis Cluster, only nodes that are -# started as cluster nodes can. In order to start a Redis instance as a -# cluster node enable the cluster support uncommenting the following: -# -# cluster-enabled yes - -# Every cluster node has a cluster configuration file. This file is not -# intended to be edited by hand. It is created and updated by Redis nodes. -# Every Redis Cluster node requires a different cluster configuration file. -# Make sure that instances running in the same system does not have -# overlapping cluster configuration file names. -# -# cluster-config-file nodes-6379.conf - -# In order to setup your cluster make sure to read the documentation -# available at http://redis.io web site. - ################################## SLOW LOG ################################### # The Redis Slow Log is a system to log queries that exceeded a specified @@ -543,11 +558,34 @@ activerehashing yes # Instead there is a default limit for pubsub and slave clients, since # subscribers and slaves receive data in a push fashion. # -# Both the hard or the soft limit can be disabled just setting it to zero. +# Both the hard or the soft limit can be disabled by setting them to zero. client-output-buffer-limit normal 0 0 0 client-output-buffer-limit slave 1024mb 512mb 360 client-output-buffer-limit pubsub 32mb 8mb 60 +# Redis calls an internal function to perform many background tasks, like +# closing connections of clients in timeot, purging expired keys that are +# never requested, and so forth. +# +# Not all tasks are perforemd with the same frequency, but Redis checks for +# tasks to perform accordingly to the specified "hz" value. +# +# By default "hz" is set to 10. Raising the value will use more CPU when +# Redis is idle, but at the same time will make Redis more responsive when +# there are many keys expiring at the same time, and timeouts may be +# handled with more precision. +# +# The range is between 1 and 500, however a value over 100 is usually not +# a good idea. Most users should use the default of 10 and raise this up to +# 100 only in environments where very low latency is required. +hz 10 + +# When a child rewrites the AOF file, if the following option is enabled +# the file will be fsync-ed every 32 MB of data generated. This is useful +# in order to commit the file to the disk more incrementally and avoid +# big latency spikes. +aof-rewrite-incremental-fsync yes + ################################## INCLUDES ################################### # Include one or more other config files here. This is useful if you @@ -555,5 +593,6 @@ client-output-buffer-limit pubsub 32mb 8mb 60 # to customize a few per-server settings. Include files can include # other files, so use this wisely. # +# include /path/to/local.conf +# include /path/to/other.conf include /etc/redis_server.conf -# include /path/to/other.conf \ No newline at end of file diff --git a/templates/maintenance_off.html b/templates/maintenance_off.html index c1a25cb58..df6118c18 100644 --- a/templates/maintenance_off.html +++ b/templates/maintenance_off.html @@ -35,9 +35,9 @@ table { background: -webkit-gradient(linear, 0% 0%, 0% 100%, from(#533754), to(#372238)); background: -moz-linear-gradient(center top , #533754 0%, #372238 100%) repeat scroll 0 0 transparent; - background: -webkit-gradient(linear, 0% 0%, 0% 100%, from(#304332), to(#172018)); +/* background: -webkit-gradient(linear, 0% 0%, 0% 100%, from(#304332), to(#172018)); background: -moz-linear-gradient(center top , #304332 0%, #172018 100%) repeat scroll 0 0 transparent; - } +*/ } #container { width: 500px } @@ -84,10 +84,8 @@

NewsBlur is in maintenance mode

-

Last night's big database migration was successful for MongoDB and Redis. However, the machine that PostgreSQL went onto is having issues. I need to make an emergency move to a bigger machine.

-

This should take about 10-15 minutes. I would ordinarily do this in the background, but because the machine is so new and is having such difficulty, it's far easier to just do a full move.

-

UPDATE +15m: Digital Ocean has just escalated my issue with their machine and the transfer is now flowing smoothly. It'll be about 10 minutes from now.

-

UPDATE +25m: Of course the problem never went away, so I'm waiting for the last 2GB in a 16GB cluster to finish up. Soon as it does, NewsBlur will be blazing fast again.

+

This will take about 10 minutes. About a week ago my primary Redis server became big enough to consume the entire machine, which is a one way ticket to getting killed automatically by the machine. The last four outages, three lasting less than 2 minutes and this morning's bigger outage, were all caused by this server toppling over.

+

I am performing the simple fix right now by moving it to a bigger machine. I am also performing the more complicated fix by concurrently writing data with a smaller footprint to another server. But this more complicated solution takes 14 days to run and won't complete until July 15th. I was hoping that I could avoid the simple fix and just wait until the 15th, but four outages is more than enough to convince me.

To pass the time, check out what's popular on MLKSHK.