Merge branch 'blue-green-deploy'

* blue-green-deploy:
  HAProxy routine during deploy should only run once.
  Blue green deploys are working!
  Working but slow due to having to indeterminately wait for haproxy to settle.
  gunicorn can still run on port 8000, docker will translate the port.
  Fixing docker commands, adding health check.
  Blue green deploy, doesn't work yet.
This commit is contained in:
Samuel Clay 2025-02-02 21:26:30 -08:00
commit 934caff501
9 changed files with 206 additions and 32 deletions

View file

@ -11,5 +11,3 @@
when: '"postgres" in group_names or "mongo" in group_names or "redis" in group_names or "elasticsearch" in group_names'
- import_playbook: playbooks/deploy_task.yml
when: "'task' in group_names"
- import_playbook: playbooks/deploy_staging.yml
when: "'staging' in group_names"

View file

@ -5,6 +5,8 @@
# serial: "50%"
vars_files:
- ../env_vars/base.yml
vars:
haproxy_host: "{{ 'hstaging' if 'staging' in inventory_hostname else 'hwww' }}"
tasks:
# - name: Leave consul
@ -119,21 +121,150 @@
tags:
- static
- name: Reload gunicorn due to no git upstream changes
become: yes
- name: Start backup container on port 8001
docker_container:
name: newsblur_web_backup
image: newsblur/newsblur_python3
container_default_behavior: no_defaults
pull: yes
env:
DOCKERBUILD: ""
state: started
command: gunicorn --config /srv/newsblur/config/gunicorn_conf.py newsblur_web.wsgi:application
hostname: "{{ inventory_hostname }}"
log_driver: json-file
log_options:
max-size: 100m
networks_cli_compatible: yes
network_mode: default
networks:
- name: newsblurnet
ports:
- "8001:8000"
restart_policy: no
user: 1000:1001
volumes:
- /srv/newsblur:/srv/newsblur
- name: Wait for backup container to be healthy
uri:
url: "http://localhost:8001/_haproxychk"
status_code: 200
register: result_8001
retries: 10
delay: 5
until: result_8001.status == 200
- name: Update HAProxy config to use backup server
delegate_to: "{{ haproxy_host }}"
run_once: true
block:
- name: Find gunicorn process
shell: "ps -C gunicorn fch -o pid | head -n 1"
register: psaux
- name: Reload gunicorn
command: "kill -HUP {{ psaux.stdout }}"
# Only restart if there were no changes to the git repo or the static tag was applied
when: not pulled.changed or "'static' in ansible_playbook_tag"
rescue:
- name: Restart Docker Container
command: "docker restart newsblur_web"
tags:
- static
- name: Generate temporary HAProxy config with backup server
template:
src: /srv/newsblur/docker/haproxy/haproxy.consul.cfg.j2
dest: /srv/newsblur/docker/haproxy/haproxy.consul.cfg.new
when: disable_consul_services_ie_staging is not defined
vars:
maintenance_mode: true
- name: Generate temporary HAProxy config with backup server
template:
src: /srv/newsblur/docker/haproxy/haproxy.staging.cfg
dest: /srv/newsblur/docker/haproxy/haproxy.consul.cfg.new
when: disable_consul_services_ie_staging is defined
vars:
maintenance_mode: true
- name: Verify new HAProxy config
shell: docker exec haproxy haproxy -c -f /srv/newsblur/docker/haproxy/haproxy.consul.cfg.new
register: haproxy_verified
failed_when: haproxy_verified.rc != 0
- name: Replace active HAProxy config
command: mv /srv/newsblur/docker/haproxy/haproxy.consul.cfg.new /srv/newsblur/docker/haproxy/haproxy.consul.cfg
- name: Reload HAProxy configuration
shell: "docker kill -s HUP haproxy"
- name: Wait for HAProxy to start using new configuration
pause:
seconds: 2
- name: Stop primary container
docker_container:
name: newsblur_web
state: stopped
container_default_behavior: no_defaults
- name: Deploy new container
docker_container:
name: newsblur_web
image: newsblur/newsblur_python3
container_default_behavior: no_defaults
pull: yes
env:
DOCKERBUILD: ""
state: started
command: gunicorn --config /srv/newsblur/config/gunicorn_conf.py newsblur_web.wsgi:application
hostname: "{{ inventory_hostname }}"
log_driver: json-file
log_options:
max-size: 100m
networks_cli_compatible: yes
network_mode: default
networks:
- name: newsblurnet
ports:
- "8000:8000"
restart_policy: unless-stopped
user: 1000:1001
volumes:
- /srv/newsblur:/srv/newsblur
- name: Wait for new container to be healthy
uri:
url: "http://localhost:8000/_haproxychk"
status_code: 200
register: result_8000
retries: 10
delay: 5
until: result_8000.status == 200
- name: Update HAProxy config to use new server
delegate_to: "{{ haproxy_host }}"
block:
- name: Generate temporary HAProxy config with new server
template:
src: /srv/newsblur/docker/haproxy/haproxy.consul.cfg.j2
dest: /srv/newsblur/docker/haproxy/haproxy.consul.cfg.new
when: disable_consul_services_ie_staging is not defined
vars:
maintenance_mode: false
- name: Generate temporary HAProxy config with new server
template:
src: /srv/newsblur/docker/haproxy/haproxy.staging.cfg
dest: /srv/newsblur/docker/haproxy/haproxy.consul.cfg.new
when: disable_consul_services_ie_staging is defined
vars:
maintenance_mode: false
- name: Verify new HAProxy config
shell: docker exec haproxy haproxy -c -f /srv/newsblur/docker/haproxy/haproxy.consul.cfg.new
register: haproxy_verified
failed_when: haproxy_verified.rc != 0
- name: Replace active HAProxy config
command: mv /srv/newsblur/docker/haproxy/haproxy.consul.cfg.new /srv/newsblur/docker/haproxy/haproxy.consul.cfg
- name: Reload HAProxy configuration
shell: "docker kill -s HUP haproxy"
- name: Stop and remove backup container
docker_container:
name: newsblur_web_backup
state: absent
container_default_behavior: no_defaults
- name: Start Consul
become: yes

View file

@ -54,7 +54,7 @@
- /etc/hosts:/etc/hosts
with_items:
- container_name: "task-celery"
command: "celery worker -A newsblur_web --loglevel=INFO -Q new_feeds,push_feeds,update_feeds,search_indexer,discover_indexer"
command: "celery worker -A newsblur_web --loglevel=INFO -Q discover_indexer,search_indexer,new_feeds,push_feeds,update_feeds"
when: "{{ inventory_hostname == 'htask-celery-10' }}"
- container_name: "task-celery"
command: "celery worker -A newsblur_web --loglevel=INFO -Q new_feeds,push_feeds,update_feeds,search_indexer"

View file

@ -1,5 +1,4 @@
---
- name: Allow haproxy port 80
become: yes
ufw: rule=allow port=80
@ -44,7 +43,7 @@
- name: Merge facts
set_fact:
updated_config: "{{ updated_config_consul.changed or updated_config_staging.changed }}"
- name: Verify HAProxy config
become: yes
shell: docker exec haproxy haproxy -c -f /srv/newsblur/docker/haproxy/haproxy.consul.cfg
@ -87,8 +86,14 @@
- /etc/letsencrypt:/etc/letsencrypt
- name: Reload haproxy
debug:
debug:
msg: Gracefully reloading HAProxy
when: updated_config and haproxy_verified.rc == 0 and not docker_container.changed
changed_when: yes
notify: reload haproxy
- name: Install socat
apt:
name: socat
state: present
become: yes

View file

@ -131,3 +131,7 @@ def redis_check(request):
if key:
return HttpResponse(unicode(key))
assert False, "Cannot read from redis-%s database" % pool
def health_check(request):
return HttpResponse("OK")

View file

@ -12,6 +12,7 @@ GIGS_OF_MEMORY = psutil.virtual_memory().total / 1024 / 1024 / 1024.0
NUM_CPUS = psutil.cpu_count()
bind = "0.0.0.0:8000"
pidfile = "/srv/newsblur/logs/gunicorn.pid"
logfile = "/srv/newsblur/logs/production.log"
accesslog = "/srv/newsblur/logs/production.log"
@ -25,7 +26,7 @@ forwarded_allow_ips = "*"
limit_request_line = 16000
limit_request_fields = 1000
worker_tmp_dir = "/dev/shm"
reload = True
reload = False
workers = max(int(math.floor(GIGS_OF_MEMORY * 2)), 3)
@ -34,6 +35,7 @@ if workers > 16:
if os.environ.get("DOCKERBUILD", False):
workers = 2
reload = True
# If hostname has staging in it, only 2 workers
if app_env and "staging" in getattr(app_env, "SERVER_NAME", ""):

View file

@ -25,11 +25,11 @@ defaults
option httplog
option redispatch
option abortonclose
retries 2
timeout connect 10s
timeout client 10s
timeout server 30s
timeout tunnel 1h
retries 3
timeout server 10s
timeout queue 10s
errorfile 502 /srv/newsblur/templates/502.http
errorfile 503 /srv/newsblur/templates/502.http
errorfile 504 /srv/newsblur/templates/502.http
@ -104,26 +104,38 @@ backend nginx
backend app_django
option httpchk GET /_haproxychk
default-server check inter 2000ms resolvers consul resolve-prefer ipv4 resolve-opts allow-dup-ip init-addr none
default-server check inter 1000ms on-error mark-down fall 2 rise 2 resolvers consul resolve-prefer ipv4 resolve-opts allow-dup-ip init-addr none
{% for host in groups.hdjango %}
server {{host}} {{host}}.node.nyc1.consul:8000
{% if maintenance_mode is defined and maintenance_mode %}
server {{host}} {{host}}.node.nyc1.consul:8001 backup
{% else %}
server {{host}} {{host}}.node.nyc1.consul:8000
{% endif %}
{% endfor %}
backend app_count
balance roundrobin
option httpchk GET /_haproxychk
default-server check inter 2000ms resolvers consul resolve-prefer ipv4 resolve-opts allow-dup-ip init-addr none
default-server check inter 1000ms on-error mark-down fall 2 rise 2 resolvers consul resolve-prefer ipv4 resolve-opts allow-dup-ip init-addr none
{% for host in groups.hcount %}
server {{host}} {{host}}.node.nyc1.consul:8000
{% if maintenance_mode is defined and maintenance_mode %}
server {{host}} {{host}}.node.nyc1.consul:8001 backup
{% else %}
server {{host}} {{host}}.node.nyc1.consul:8000
{% endif %}
{% endfor %}
# server-template app-counts 1 _app-counts._tcp.service.nyc1.consul:8000 check inter 2000ms resolvers consul resolve-prefer ipv4 resolve-opts allow-dup-ip init-addr none
backend app_refresh
balance roundrobin
option httpchk GET /_haproxychk
default-server check inter 2000ms resolvers consul resolve-prefer ipv4 resolve-opts allow-dup-ip init-addr none
default-server check inter 2000ms on-error mark-down fall 2 rise 2 resolvers consul resolve-prefer ipv4 resolve-opts allow-dup-ip init-addr none
{% for host in groups.hrefresh %}
server {{host}} {{host}}.node.nyc1.consul:8000
{% if maintenance_mode is defined and maintenance_mode %}
server {{host}} {{host}}.node.nyc1.consul:8001 backup
{% else %}
server {{host}} {{host}}.node.nyc1.consul:8000
{% endif %}
{% endfor %}
# server-template app-refresh 1 _app-refresh._tcp.service.nyc1.consul:8000 check inter 2000ms resolvers consul resolve-prefer ipv4 resolve-opts allow-dup-ip init-addr none
@ -132,7 +144,11 @@ backend app_push
option httpchk GET /_haproxychk
default-server check inter 2000ms resolvers consul resolve-prefer ipv4 resolve-opts allow-dup-ip init-addr none
{% for host in groups.hpush %}
server {{host}} {{host}}.node.nyc1.consul:8000
{% if maintenance_mode is defined and maintenance_mode %}
server {{host}} {{host}}.node.nyc1.consul:8001 backup
{% else %}
server {{host}} {{host}}.node.nyc1.consul:8000
{% endif %}
{% endfor %}
# server-template app-push 1 _app-push._tcp.service.nyc1.consul:8000 check inter 2000ms resolvers consul resolve-prefer ipv4 resolve-opts allow-dup-ip init-addr none
@ -182,7 +198,11 @@ backend staging
option httpchk GET /_haproxychk
default-server check inter 2000ms resolvers consul resolve-prefer ipv4 resolve-opts allow-dup-ip init-addr none
{% for host in groups.staging %}
server {{host}} {{host}}.node.nyc1.consul:8000
{% if maintenance_mode is defined and maintenance_mode %}
server {{host}} {{host}}.node.nyc1.consul:8001 backup
{% else %}
server {{host}} {{host}}.node.nyc1.consul:8000
{% endif %}
{% endfor %}
backend blog

View file

@ -8,6 +8,7 @@ global
tune.ssl.default-dh-param 2048
log 127.0.0.1 local0 notice
# log 127.0.0.1 local1 info
stats socket /var/run/haproxy.sock mode 660 level admin
defaults
log global
@ -112,17 +113,29 @@ backend nginx
backend app_django_counts
balance roundrobin
option httpchk GET /_haproxychk
{% if maintenance_mode is not defined or not maintenance_mode %}
server app_django 127.0.0.1:8000 check inter 3000ms
{% else %}
server app_django_backup 127.0.0.1:8001 check inter 3000ms backup
{% endif %}
backend app_django_refresh
balance roundrobin
option httpchk GET /_haproxychk
{% if maintenance_mode is not defined or not maintenance_mode %}
server app_django 127.0.0.1:8000 check inter 3000ms
{% else %}
server app_django_backup 127.0.0.1:8001 check inter 3000ms backup
{% endif %}
backend app_django
balance roundrobin
option httpchk GET /_haproxychk
{% if maintenance_mode is not defined or not maintenance_mode %}
server app_django 127.0.0.1:8000 check inter 3000ms
{% else %}
server app_django_backup 127.0.0.1:8001 check inter 3000ms backup
{% endif %}
backend maintenance
option httpchk HEAD /maintenance

View file

@ -84,6 +84,7 @@ urlpatterns = [
url(r"^account/ifttt/v1/", include("apps.oauth.urls")),
url(r"^account/", include("oauth2_provider.urls", namespace="oauth2_provider")),
url(r"^monitor/", include("apps.monitor.urls"), name="monitor"),
url(r"^health-check/?", static_views.health_check, name="health-check"),
url("", include("django_prometheus.urls")),
]