diff --git a/docker/alertmanager/alertmanager.docker.yml b/docker/alertmanager/alertmanager.docker.yml new file mode 100644 index 000000000..fc3834517 --- /dev/null +++ b/docker/alertmanager/alertmanager.docker.yml @@ -0,0 +1,16 @@ +route: + group_by: [alertname] + receiver: email-me + group_wait: 10s + group_interval: 5m + repeat_interval: 5m + +receivers: +- name: email-me + email_configs: + - to: ${ALERT_RECEIVER_EMAIL} + from: ${FROM_EMAIL} + smarthost: ${SMARTHOST} + auth_username: ${AUTH_USERNAME} + auth_identity: ${AUTH_IDENTITY} + auth_password: ${AUTH_PASSWORD} diff --git a/docker/alertmanager/alertmanager.yml.j2 b/docker/alertmanager/alertmanager.yml.j2 new file mode 100644 index 000000000..8e6274cd7 --- /dev/null +++ b/docker/alertmanager/alertmanager.yml.j2 @@ -0,0 +1,16 @@ +route: + group_by: [alertname] + receiver: email-me + group_wait: 10s + group_interval: 5m + repeat_interval: 5m + +receivers: +- name: email-me + email_configs: + - to: {{ alert_receiver_email }} + from: {{ from_email }} + smarthost: {{ smarthost }} + auth_username: {{ auth_username }} + auth_identity: {{ auth_identity }} + auth_password: {{ auth_password }} diff --git a/docker/prometheus/prometheus.consul.yml.j2 b/docker/prometheus/prometheus.consul.yml.j2 new file mode 100644 index 000000000..996bb2e2d --- /dev/null +++ b/docker/prometheus/prometheus.consul.yml.j2 @@ -0,0 +1,114 @@ +global: + scrape_interval: 300s + external_labels: + monitor: 'my-monitor' +scrape_configs: + - job_name: 'node-exporter' + target_groups: + - targets: ['node-exporter:9100'] # get rid of this and add consul sd configs + + - job_name: 'app_servers' + target_groups: + - targets: ['haproxy.service.nyc1.consul'] + metrics_path: /monitor/app-servers + scheme: https + tls_config: + insecure_skip_verify: true + - job_name: 'app_times' + target_groups: + - targets: ['{{ ansible_ssh_host }}'] + metrics_path: /monitor/app-times + scheme: https + tls_config: + insecure_skip_verify: true + - job_name: 'classifiers' + target_groups: + - targets: ['{{ ansible_ssh_host }}'] + metrics_path: /monitor/classifiers + scheme: https + tls_config: + insecure_skip_verify: true + - job_name: 'db_times' + target_groups: + - targets: ['{{ ansible_ssh_host }}'] + metrics_path: /monitor/db-times + scheme: https + tls_config: + insecure_skip_verify: true + - job_name: 'errors' + target_groups: + - targets: ['{{ ansible_ssh_host }}'] + metrics_path: /monitor/errors + scheme: https + tls_config: + insecure_skip_verify: true + - job_name: 'feed_counts' + target_groups: + - targets: ['{{ ansible_ssh_host }}'] + metrics_path: /monitor/feed-counts + scheme: https + tls_config: + insecure_skip_verify: true + - job_name: 'feeds' + target_groups: + - targets: ['{{ ansible_ssh_host }}'] + metrics_path: /monitor/feeds + scheme: https + tls_config: + insecure_skip_verify: true + - job_name: 'load_times' + target_groups: + - targets: ['{{ ansible_ssh_host }}'] + metrics_path: /monitor/load-times + scheme: https + tls_config: + insecure_skip_verify: true + #- job_name: 'stories' + # target_groups: + # - targets: ['{{ ansible_ssh_host }}'] + # metrics_path: /monitor/stories + # scheme: https + # tls_config: + # insecure_skip_verify: true + - job_name: 'task_codes' + target_groups: + - targets: ['{{ ansible_ssh_host }}'] + metrics_path: /monitor/task-codes + scheme: https + tls_config: + insecure_skip_verify: true + - job_name: 'task_pipeline' + target_groups: + - targets: ['{{ ansible_ssh_host }}'] + metrics_path: /monitor/task-pipeline + scheme: https + tls_config: + insecure_skip_verify: true + - job_name: 'task_servers' + target_groups: + - targets: ['{{ ansible_ssh_host }}'] + metrics_path: /monitor/task-servers + scheme: https + tls_config: + insecure_skip_verify: true + - job_name: 'task_times' + target_groups: + - targets: ['{{ ansible_ssh_host }}'] + metrics_path: /monitor/task-times + scheme: https + tls_config: + insecure_skip_verify: true + - job_name: 'updates' + target_groups: + - targets: ['{{ ansible_ssh_host }}'] + metrics_path: /monitor/updates + scheme: https + tls_config: + insecure_skip_verify: true + - job_name: 'users' + target_groups: + - targets: ['{{ ansible_ssh_host }}'] + metrics_path: /monitor/users + scheme: https + tls_config: + insecure_skip_verify: true \ No newline at end of file diff --git a/docker/prometheus/prometheus.docker.yml b/docker/prometheus/prometheus.docker.yml new file mode 100644 index 000000000..7bed9191e --- /dev/null +++ b/docker/prometheus/prometheus.docker.yml @@ -0,0 +1,125 @@ +global: + scrape_interval: 5s + external_labels: + monitor: 'my-monitor' + +rule_files: + - "/etc/prometheus/prometheus_rules.yml" + +alerting: + alertmanagers: + - static_configs: + - targets: + - alertmanager:9093 + +scrape_configs: + - job_name: 'node-exporter' + static_configs: + - targets: ['node-exporter:9100'] + + - job_name: 'app_servers' + static_configs: + - targets: ['haproxy'] + metrics_path: /monitor/app-servers + scheme: https + tls_config: + insecure_skip_verify: true + - job_name: 'app_times' + static_configs: + - targets: ['haproxy'] + metrics_path: /monitor/app-times + scheme: https + tls_config: + insecure_skip_verify: true + - job_name: 'classifiers' + static_configs: + - targets: ['haproxy'] + metrics_path: /monitor/classifiers + scheme: https + tls_config: + insecure_skip_verify: true + - job_name: 'db_times' + static_configs: + - targets: ['haproxy'] + metrics_path: /monitor/db-times + scheme: https + tls_config: + insecure_skip_verify: true + - job_name: 'errors' + static_configs: + - targets: ['haproxy'] + metrics_path: /monitor/errors + scheme: https + tls_config: + insecure_skip_verify: true + - job_name: 'feed_counts' + static_configs: + - targets: ['haproxy'] + metrics_path: /monitor/feed-counts + scheme: https + tls_config: + insecure_skip_verify: true + - job_name: 'feeds' + static_configs: + - targets: ['haproxy'] + metrics_path: /monitor/feeds + scheme: https + tls_config: + insecure_skip_verify: true + - job_name: 'load_times' + static_configs: + - targets: ['haproxy'] + metrics_path: /monitor/load-times + scheme: https + tls_config: + insecure_skip_verify: true + - job_name: 'stories' + static_configs: + - targets: ['haproxy'] + metrics_path: /monitor/stories + scheme: https + tls_config: + insecure_skip_verify: true + - job_name: 'task_codes' + static_configs: + - targets: ['haproxy'] + metrics_path: /monitor/task-codes + scheme: https + tls_config: + insecure_skip_verify: true + - job_name: 'task_pipeline' + static_configs: + - targets: ['haproxy'] + metrics_path: /monitor/task-pipeline + scheme: https + tls_config: + insecure_skip_verify: true + - job_name: 'task_servers' + static_configs: + - targets: ['haproxy'] + metrics_path: /monitor/task-servers + scheme: https + tls_config: + insecure_skip_verify: true + - job_name: 'task_times' + static_configs: + - targets: ['haproxy'] + metrics_path: /monitor/task-times + scheme: https + tls_config: + insecure_skip_verify: true + - job_name: 'updates' + static_configs: + - targets: ['haproxy'] + metrics_path: /monitor/updates + scheme: https + tls_config: + insecure_skip_verify: true + - job_name: 'users' + static_configs: + - targets: ['haproxy'] + metrics_path: /monitor/users + scheme: https + tls_config: + insecure_skip_verify: true + \ No newline at end of file diff --git a/docker/prometheus/prometheus_rules.yml b/docker/prometheus/prometheus_rules.yml new file mode 100644 index 000000000..016cb081a --- /dev/null +++ b/docker/prometheus/prometheus_rules.yml @@ -0,0 +1,9 @@ +groups: +- name: high request latency + rules: + - alert: HighRequestLatency + expr: job:request_latency_seconds > 5 + labels: + severity: warning + annotations: + summary: Request latency greater than 5 seconds \ No newline at end of file