diff --git a/ansible/deploy.yml b/ansible/deploy.yml index 027a8bc7c..751c50197 100644 --- a/ansible/deploy.yml +++ b/ansible/deploy.yml @@ -11,5 +11,3 @@ when: '"postgres" in group_names or "mongo" in group_names or "redis" in group_names or "elasticsearch" in group_names' - import_playbook: playbooks/deploy_task.yml when: "'task' in group_names" -- import_playbook: playbooks/deploy_staging.yml - when: "'staging' in group_names" diff --git a/ansible/playbooks/deploy_app.yml b/ansible/playbooks/deploy_app.yml index 3fd9aeffd..aad3e79f3 100644 --- a/ansible/playbooks/deploy_app.yml +++ b/ansible/playbooks/deploy_app.yml @@ -5,6 +5,8 @@ # serial: "50%" vars_files: - ../env_vars/base.yml + vars: + haproxy_host: "{{ 'hstaging' if 'staging' in inventory_hostname else 'hwww' }}" tasks: # - name: Leave consul @@ -119,21 +121,150 @@ tags: - static - - name: Reload gunicorn due to no git upstream changes - become: yes + - name: Start backup container on port 8001 + docker_container: + name: newsblur_web_backup + image: newsblur/newsblur_python3 + container_default_behavior: no_defaults + pull: yes + env: + DOCKERBUILD: "" + state: started + command: gunicorn --config /srv/newsblur/config/gunicorn_conf.py newsblur_web.wsgi:application + hostname: "{{ inventory_hostname }}" + log_driver: json-file + log_options: + max-size: 100m + networks_cli_compatible: yes + network_mode: default + networks: + - name: newsblurnet + ports: + - "8001:8000" + restart_policy: no + user: 1000:1001 + volumes: + - /srv/newsblur:/srv/newsblur + + - name: Wait for backup container to be healthy + uri: + url: "http://localhost:8001/_haproxychk" + status_code: 200 + register: result_8001 + retries: 10 + delay: 5 + until: result_8001.status == 200 + + - name: Update HAProxy config to use backup server + delegate_to: "{{ haproxy_host }}" + run_once: true block: - - name: Find gunicorn process - shell: "ps -C gunicorn fch -o pid | head -n 1" - register: psaux - - name: Reload gunicorn - command: "kill -HUP {{ psaux.stdout }}" - # Only restart if there were no changes to the git repo or the static tag was applied - when: not pulled.changed or "'static' in ansible_playbook_tag" - rescue: - - name: Restart Docker Container - command: "docker restart newsblur_web" - tags: - - static + - name: Generate temporary HAProxy config with backup server + template: + src: /srv/newsblur/docker/haproxy/haproxy.consul.cfg.j2 + dest: /srv/newsblur/docker/haproxy/haproxy.consul.cfg.new + when: disable_consul_services_ie_staging is not defined + vars: + maintenance_mode: true + + - name: Generate temporary HAProxy config with backup server + template: + src: /srv/newsblur/docker/haproxy/haproxy.staging.cfg + dest: /srv/newsblur/docker/haproxy/haproxy.consul.cfg.new + when: disable_consul_services_ie_staging is defined + vars: + maintenance_mode: true + + - name: Verify new HAProxy config + shell: docker exec haproxy haproxy -c -f /srv/newsblur/docker/haproxy/haproxy.consul.cfg.new + register: haproxy_verified + failed_when: haproxy_verified.rc != 0 + + - name: Replace active HAProxy config + command: mv /srv/newsblur/docker/haproxy/haproxy.consul.cfg.new /srv/newsblur/docker/haproxy/haproxy.consul.cfg + + - name: Reload HAProxy configuration + shell: "docker kill -s HUP haproxy" + + - name: Wait for HAProxy to start using new configuration + pause: + seconds: 2 + + - name: Stop primary container + docker_container: + name: newsblur_web + state: stopped + container_default_behavior: no_defaults + + - name: Deploy new container + docker_container: + name: newsblur_web + image: newsblur/newsblur_python3 + container_default_behavior: no_defaults + pull: yes + env: + DOCKERBUILD: "" + state: started + command: gunicorn --config /srv/newsblur/config/gunicorn_conf.py newsblur_web.wsgi:application + hostname: "{{ inventory_hostname }}" + log_driver: json-file + log_options: + max-size: 100m + networks_cli_compatible: yes + network_mode: default + networks: + - name: newsblurnet + ports: + - "8000:8000" + restart_policy: unless-stopped + user: 1000:1001 + volumes: + - /srv/newsblur:/srv/newsblur + + - name: Wait for new container to be healthy + uri: + url: "http://localhost:8000/_haproxychk" + status_code: 200 + register: result_8000 + retries: 10 + delay: 5 + until: result_8000.status == 200 + + - name: Update HAProxy config to use new server + delegate_to: "{{ haproxy_host }}" + block: + - name: Generate temporary HAProxy config with new server + template: + src: /srv/newsblur/docker/haproxy/haproxy.consul.cfg.j2 + dest: /srv/newsblur/docker/haproxy/haproxy.consul.cfg.new + when: disable_consul_services_ie_staging is not defined + vars: + maintenance_mode: false + + - name: Generate temporary HAProxy config with new server + template: + src: /srv/newsblur/docker/haproxy/haproxy.staging.cfg + dest: /srv/newsblur/docker/haproxy/haproxy.consul.cfg.new + when: disable_consul_services_ie_staging is defined + vars: + maintenance_mode: false + + - name: Verify new HAProxy config + shell: docker exec haproxy haproxy -c -f /srv/newsblur/docker/haproxy/haproxy.consul.cfg.new + register: haproxy_verified + failed_when: haproxy_verified.rc != 0 + + - name: Replace active HAProxy config + command: mv /srv/newsblur/docker/haproxy/haproxy.consul.cfg.new /srv/newsblur/docker/haproxy/haproxy.consul.cfg + + - name: Reload HAProxy configuration + shell: "docker kill -s HUP haproxy" + + - name: Stop and remove backup container + docker_container: + name: newsblur_web_backup + state: absent + container_default_behavior: no_defaults - name: Start Consul become: yes diff --git a/ansible/roles/celery_task/tasks/main.yml b/ansible/roles/celery_task/tasks/main.yml index e65da5438..ea1098606 100644 --- a/ansible/roles/celery_task/tasks/main.yml +++ b/ansible/roles/celery_task/tasks/main.yml @@ -54,7 +54,7 @@ - /etc/hosts:/etc/hosts with_items: - container_name: "task-celery" - command: "celery worker -A newsblur_web --loglevel=INFO -Q new_feeds,push_feeds,update_feeds,search_indexer,discover_indexer" + command: "celery worker -A newsblur_web --loglevel=INFO -Q discover_indexer,search_indexer,new_feeds,push_feeds,update_feeds" when: "{{ inventory_hostname == 'htask-celery-10' }}" - container_name: "task-celery" command: "celery worker -A newsblur_web --loglevel=INFO -Q new_feeds,push_feeds,update_feeds,search_indexer" diff --git a/ansible/roles/haproxy/tasks/main.yml b/ansible/roles/haproxy/tasks/main.yml index 2262780bd..7d629590f 100644 --- a/ansible/roles/haproxy/tasks/main.yml +++ b/ansible/roles/haproxy/tasks/main.yml @@ -1,5 +1,4 @@ --- - - name: Allow haproxy port 80 become: yes ufw: rule=allow port=80 @@ -44,7 +43,7 @@ - name: Merge facts set_fact: updated_config: "{{ updated_config_consul.changed or updated_config_staging.changed }}" - + - name: Verify HAProxy config become: yes shell: docker exec haproxy haproxy -c -f /srv/newsblur/docker/haproxy/haproxy.consul.cfg @@ -87,8 +86,14 @@ - /etc/letsencrypt:/etc/letsencrypt - name: Reload haproxy - debug: + debug: msg: Gracefully reloading HAProxy when: updated_config and haproxy_verified.rc == 0 and not docker_container.changed changed_when: yes notify: reload haproxy + +- name: Install socat + apt: + name: socat + state: present + become: yes diff --git a/apps/static/views.py b/apps/static/views.py index 9b2f04df4..3f5e46e4b 100644 --- a/apps/static/views.py +++ b/apps/static/views.py @@ -131,3 +131,7 @@ def redis_check(request): if key: return HttpResponse(unicode(key)) assert False, "Cannot read from redis-%s database" % pool + + +def health_check(request): + return HttpResponse("OK") diff --git a/config/gunicorn_conf.py b/config/gunicorn_conf.py index b6f226127..6ac9ba055 100644 --- a/config/gunicorn_conf.py +++ b/config/gunicorn_conf.py @@ -12,6 +12,7 @@ GIGS_OF_MEMORY = psutil.virtual_memory().total / 1024 / 1024 / 1024.0 NUM_CPUS = psutil.cpu_count() bind = "0.0.0.0:8000" + pidfile = "/srv/newsblur/logs/gunicorn.pid" logfile = "/srv/newsblur/logs/production.log" accesslog = "/srv/newsblur/logs/production.log" @@ -25,7 +26,7 @@ forwarded_allow_ips = "*" limit_request_line = 16000 limit_request_fields = 1000 worker_tmp_dir = "/dev/shm" -reload = True +reload = False workers = max(int(math.floor(GIGS_OF_MEMORY * 2)), 3) @@ -34,6 +35,7 @@ if workers > 16: if os.environ.get("DOCKERBUILD", False): workers = 2 + reload = True # If hostname has staging in it, only 2 workers if app_env and "staging" in getattr(app_env, "SERVER_NAME", ""): diff --git a/docker/haproxy/haproxy.consul.cfg.j2 b/docker/haproxy/haproxy.consul.cfg.j2 index 01a7a5471..bd82ebf8a 100644 --- a/docker/haproxy/haproxy.consul.cfg.j2 +++ b/docker/haproxy/haproxy.consul.cfg.j2 @@ -25,11 +25,11 @@ defaults option httplog option redispatch option abortonclose + retries 2 timeout connect 10s timeout client 10s - timeout server 30s - timeout tunnel 1h - retries 3 + timeout server 10s + timeout queue 10s errorfile 502 /srv/newsblur/templates/502.http errorfile 503 /srv/newsblur/templates/502.http errorfile 504 /srv/newsblur/templates/502.http @@ -104,26 +104,38 @@ backend nginx backend app_django option httpchk GET /_haproxychk - default-server check inter 2000ms resolvers consul resolve-prefer ipv4 resolve-opts allow-dup-ip init-addr none + default-server check inter 1000ms on-error mark-down fall 2 rise 2 resolvers consul resolve-prefer ipv4 resolve-opts allow-dup-ip init-addr none {% for host in groups.hdjango %} - server {{host}} {{host}}.node.nyc1.consul:8000 + {% if maintenance_mode is defined and maintenance_mode %} + server {{host}} {{host}}.node.nyc1.consul:8001 backup + {% else %} + server {{host}} {{host}}.node.nyc1.consul:8000 + {% endif %} {% endfor %} backend app_count balance roundrobin option httpchk GET /_haproxychk - default-server check inter 2000ms resolvers consul resolve-prefer ipv4 resolve-opts allow-dup-ip init-addr none + default-server check inter 1000ms on-error mark-down fall 2 rise 2 resolvers consul resolve-prefer ipv4 resolve-opts allow-dup-ip init-addr none {% for host in groups.hcount %} - server {{host}} {{host}}.node.nyc1.consul:8000 + {% if maintenance_mode is defined and maintenance_mode %} + server {{host}} {{host}}.node.nyc1.consul:8001 backup + {% else %} + server {{host}} {{host}}.node.nyc1.consul:8000 + {% endif %} {% endfor %} # server-template app-counts 1 _app-counts._tcp.service.nyc1.consul:8000 check inter 2000ms resolvers consul resolve-prefer ipv4 resolve-opts allow-dup-ip init-addr none backend app_refresh balance roundrobin option httpchk GET /_haproxychk - default-server check inter 2000ms resolvers consul resolve-prefer ipv4 resolve-opts allow-dup-ip init-addr none + default-server check inter 2000ms on-error mark-down fall 2 rise 2 resolvers consul resolve-prefer ipv4 resolve-opts allow-dup-ip init-addr none {% for host in groups.hrefresh %} - server {{host}} {{host}}.node.nyc1.consul:8000 + {% if maintenance_mode is defined and maintenance_mode %} + server {{host}} {{host}}.node.nyc1.consul:8001 backup + {% else %} + server {{host}} {{host}}.node.nyc1.consul:8000 + {% endif %} {% endfor %} # server-template app-refresh 1 _app-refresh._tcp.service.nyc1.consul:8000 check inter 2000ms resolvers consul resolve-prefer ipv4 resolve-opts allow-dup-ip init-addr none @@ -132,7 +144,11 @@ backend app_push option httpchk GET /_haproxychk default-server check inter 2000ms resolvers consul resolve-prefer ipv4 resolve-opts allow-dup-ip init-addr none {% for host in groups.hpush %} - server {{host}} {{host}}.node.nyc1.consul:8000 + {% if maintenance_mode is defined and maintenance_mode %} + server {{host}} {{host}}.node.nyc1.consul:8001 backup + {% else %} + server {{host}} {{host}}.node.nyc1.consul:8000 + {% endif %} {% endfor %} # server-template app-push 1 _app-push._tcp.service.nyc1.consul:8000 check inter 2000ms resolvers consul resolve-prefer ipv4 resolve-opts allow-dup-ip init-addr none @@ -182,7 +198,11 @@ backend staging option httpchk GET /_haproxychk default-server check inter 2000ms resolvers consul resolve-prefer ipv4 resolve-opts allow-dup-ip init-addr none {% for host in groups.staging %} - server {{host}} {{host}}.node.nyc1.consul:8000 + {% if maintenance_mode is defined and maintenance_mode %} + server {{host}} {{host}}.node.nyc1.consul:8001 backup + {% else %} + server {{host}} {{host}}.node.nyc1.consul:8000 + {% endif %} {% endfor %} backend blog diff --git a/docker/haproxy/haproxy.staging.cfg b/docker/haproxy/haproxy.staging.cfg index b075ada5f..7fdf9fa82 100644 --- a/docker/haproxy/haproxy.staging.cfg +++ b/docker/haproxy/haproxy.staging.cfg @@ -8,6 +8,7 @@ global tune.ssl.default-dh-param 2048 log 127.0.0.1 local0 notice # log 127.0.0.1 local1 info + stats socket /var/run/haproxy.sock mode 660 level admin defaults log global @@ -112,17 +113,29 @@ backend nginx backend app_django_counts balance roundrobin option httpchk GET /_haproxychk + {% if maintenance_mode is not defined or not maintenance_mode %} server app_django 127.0.0.1:8000 check inter 3000ms + {% else %} + server app_django_backup 127.0.0.1:8001 check inter 3000ms backup + {% endif %} backend app_django_refresh balance roundrobin option httpchk GET /_haproxychk + {% if maintenance_mode is not defined or not maintenance_mode %} server app_django 127.0.0.1:8000 check inter 3000ms + {% else %} + server app_django_backup 127.0.0.1:8001 check inter 3000ms backup + {% endif %} backend app_django balance roundrobin option httpchk GET /_haproxychk + {% if maintenance_mode is not defined or not maintenance_mode %} server app_django 127.0.0.1:8000 check inter 3000ms + {% else %} + server app_django_backup 127.0.0.1:8001 check inter 3000ms backup + {% endif %} backend maintenance option httpchk HEAD /maintenance diff --git a/newsblur_web/urls.py b/newsblur_web/urls.py index 309ad76fe..ee9375f7d 100644 --- a/newsblur_web/urls.py +++ b/newsblur_web/urls.py @@ -84,6 +84,7 @@ urlpatterns = [ url(r"^account/ifttt/v1/", include("apps.oauth.urls")), url(r"^account/", include("oauth2_provider.urls", namespace="oauth2_provider")), url(r"^monitor/", include("apps.monitor.urls"), name="monitor"), + url(r"^health-check/?", static_views.health_check, name="health-check"), url("", include("django_prometheus.urls")), ]