Revert "Revert "Merge branch 'add_grafana_monitors'""

This reverts commit a7f5025a16.
This commit is contained in:
Samuel Clay 2021-09-07 11:36:11 -04:00 committed by Jonathan Math
parent 202a05ae84
commit 3e23b6394a
16 changed files with 18631 additions and 15819 deletions

View file

@ -1,6 +1,7 @@
SHELL := /bin/bash
CURRENT_UID := $(shell id -u)
CURRENT_GID := $(shell id -g)
newsblur := $(shell docker ps -qf "name=newsblur_web")
.PHONY: node
@ -35,8 +36,7 @@ bash:
- RUNWITHMAKEBUILD=True CURRENT_UID=${CURRENT_UID} CURRENT_GID=${CURRENT_GID} docker-compose exec newsblur_web bash
# allows user to exec into newsblur_web and use pdb.
debug:
- newsblur := $(shell docker ps -qf "name=newsblur_web")
- CURRENT_UID=${CURRENT_UID} CURRENT_GID=${CURRENT_GID} docker attach ${newsblur}
- RUNWITHMAKEBUILD=True CURRENT_UID=${CURRENT_UID} CURRENT_GID=${CURRENT_GID} docker attach ${newsblur}
log:
- RUNWITHMAKEBUILD=True docker-compose logs -f --tail 20 newsblur_web newsblur_node
logweb: log

View file

@ -2,7 +2,8 @@ from django.conf.urls import url
from apps.monitor.views import ( AppServers, AppTimes,
Classifiers, DbTimes, Errors, FeedCounts, Feeds, LoadTimes,
Stories, TasksCodes, TasksPipeline, TasksServers, TasksTimes,
Updates, Users
Updates, Users, MongoDBHeapUsage, MongoDBObjects, MongoDBOpsReplsetLag,
MongoDBSize, MongoDBOps, MongoDBPageFaults, MongoDBPageQueues
)
urlpatterns = [
url(r'^app-servers?$', AppServers.as_view(), name="app_servers"),
@ -20,4 +21,11 @@ urlpatterns = [
url(r'^task-times?$', TasksTimes.as_view(), name="task_times"),
url(r'^updates?$', Updates.as_view(), name="updates"),
url(r'^users?$', Users.as_view(), name="users"),
url(r'^mongo-heap-usage?$', MongoDBHeapUsage.as_view(), name="mongo_heap_usage"),
url(r'^mongo-objects?$', MongoDBObjects.as_view(), name="mongo_db_objects"),
url(r'^mongo-replset-lag?$', MongoDBOpsReplsetLag.as_view(), name="mongo_ops_replset_lag"),
url(r'^mongo-size?$', MongoDBSize.as_view(), name="mongo_size"),
url(r'^mongo-ops?$', MongoDBOps.as_view(), name="mongo_ops"),
url(r'^mongo-page-faults?$', MongoDBPageFaults.as_view(), name="mongo_page_faults"),
url(r'^mongo-page-queues?$', MongoDBPageQueues.as_view(), name="mongo_page_queues"),
]

View file

@ -13,3 +13,5 @@ from apps.monitor.views.newsblur_tasks_servers import TasksServers
from apps.monitor.views.newsblur_tasks_times import TasksTimes
from apps.monitor.views.newsblur_updates import Updates
from apps.monitor.views.newsblur_users import Users
from apps.monitor.views.prometheus_mongo import MongoDBHeapUsage, MongoDBObjects, MongoDBOpsReplsetLag, MongoDBSize, MongoDBOps, MongoDBPageFaults, MongoDBPageQueues

View file

@ -0,0 +1,191 @@
import os
from django.views import View
from django.shortcuts import render
class MongoGrafanaMetric(View):
def __init__(self):
super(View, self).__init__()
self.dbname = os.environ.get('MONGODB_DATABASE')
host = os.environ.get('MONGODB_SERVER') or 'db_mongo:29019'
if ':' in host:
host, port = host.split(':')
port = int(port)
else:
port = 27017
self.server = (host, port)
@property
def connection(self):
if not hasattr(self, '_connection'):
import pymongo
self._connection = pymongo.MongoClient(self.server[0], self.server[1])
return self._connection
@property
def host(self):
return os.environ.get('MONGODB_SERVER') or 'db_mongo:29019'
def autoconf(self):
return bool(self.connection)
def get_context(self):
raise NotImplementedError('You must implement the get_context function')
def get(self, request):
context = self.get_context()
return render(request, 'monitor/prometheus_data.html', context, content_type="text/plain")
class MongoDBHeapUsage(MongoGrafanaMetric):
def get_context(self):
value = self.connection.admin.command('serverStatus')
try:
value = value['extra_info']['heap_usage_bytes']
except KeyError:
# I am getting this
value = "U"
data = {
'heap_usage_bytes': value
}
return {
"data": data,
"chart_name": 'heap_usage',
"chart_type": 'gauge',
}
class MongoDBObjects(MongoGrafanaMetric):
def get_context(self):
stats = self.connection.newsblur.command("dbstats")
data = dict(objects=stats['objects'])
formatted_data = {}
for k, v in data.items():
formatted_data[k] = f'mongo_objects{{db="{self.host}"}} {v}'
return {
"data": formatted_data,
"chart_name": 'objects',
"chart_type": 'gauge',
}
class MongoDBOpsReplsetLag(MongoGrafanaMetric):
# needs --replSet in docker command but when I do this, newsblur_web cnat connect to mongo
def _get_oplog_length(self):
oplog = self.connection.local.oplog.rs
last_op = oplog.find({}, {'ts': 1}).sort([('$natural', -1)]).limit(1)[0]['ts'].time
first_op = oplog.find({}, {'ts': 1}).sort([('$natural', 1)]).limit(1)[0]['ts'].time
oplog_length = last_op - first_op
return oplog_length
def _get_max_replication_lag(self):
PRIMARY_STATE = 1
SECONDARY_STATE = 2
status = self.connection.admin.command('replSetGetStatus')
members = status['members']
primary_optime = None
oldest_secondary_optime = None
for member in members:
member_state = member['state']
optime = member['optime']
if member_state == PRIMARY_STATE:
primary_optime = optime['ts'].time
elif member_state == SECONDARY_STATE:
if not oldest_secondary_optime or optime['ts'].time < oldest_secondary_optime:
oldest_secondary_optime = optime['ts'].time
if not primary_optime or not oldest_secondary_optime:
raise Exception("Replica set is not healthy")
return primary_optime - oldest_secondary_optime
def get_context(self):
# no such item for Cursor instance
oplog_length = self._get_oplog_length()
# not running with --replSet
replication_lag = self._get_max_replication_lag()
formatted_data = {}
for k, v in oplog_length.items():
formatted_data[k] = f'mongo_oplog{{type="length", db="{self.host}"}} {v}'
for k, v in replication_lag.items():
formatted_data[k] = f'mongo_oplog{{type="lag", db="{self.host}"}} {v}'
return {
"data": formatted_data,
"chart_name": 'oplog_metrics',
"chart_type": 'gauge',
}
class MongoDBSize(MongoGrafanaMetric):
def get_context(self):
stats = self.connection.newsblur.command("dbstats")
data = dict(size=stats['fsUsedSize'])
formatted_data = {}
for k, v in data.items():
formatted_data[k] = f'mongo_db_size{{db="{self.host}"}} {v}'
return {
"data": formatted_data,
"chart_name": 'db_size_bytes',
"chart_type": 'gauge',
}
class MongoDBOps(MongoGrafanaMetric):
def get_context(self):
status = self.connection.admin.command('serverStatus')
data = dict(
(q, status["opcounters"][q])
for q in status['opcounters'].keys()
)
formatted_data = {}
for k,v in data.items():
formatted_data[k] = f'mongo_ops{{type="{k}", db="{self.host}"}} {v}'
return {
"data": formatted_data,
"chart_name": 'ops',
"chart_type": 'gauge',
}
class MongoDBPageFaults(MongoGrafanaMetric):
def get_context(self):
status = self.connection.admin.command('serverStatus')
try:
value = status['extra_info']['page_faults']
except KeyError:
value = "U"
data = dict(page_faults=value)
formatted_data = {}
for k, v in data.items():
formatted_data[k] = f'mongo_page_faults{{db="{self.host}"}} {v}'
return {
"data": formatted_data,
"chart_name": 'page_faults',
"chart_type": 'counter',
}
class MongoDBPageQueues(MongoGrafanaMetric):
def get_context(self):
status = self.connection.admin.command('serverStatus')
data = dict(
(q, status["globalLock"]["currentQueue"][q])
for q in ("readers", "writers")
)
formatted_data = {}
for k, v in data.items():
formatted_data[k] = f'mongo_page_queues{{type="{k}", db="{self.host}"}} {v}'
return {
"data": formatted_data,
"chart_name": 'queues',
"chart_type": 'gauge',
}

View file

@ -0,0 +1,83 @@
import os
import socket
from django.views import View
from django.shortcuts import render
"""
RedisActiveConnections
RedisCommands
RedisConnects
RedisUsedMemory
RedisSize
"""
class RedisGrafanaMetric(View):
category = "Redis"
def autoconf(self):
try:
self.get_info()
except socket.error:
return False
return True
def get_info(self):
host = os.environ.get('REDIS_HOST') or '127.0.0.1'
port = int(os.environ.get('REDIS_PORT') or '6379')
if host.startswith('/'):
s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
s.connect(host)
else:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect((host, port))
s.send("*1\r\n$4\r\ninfo\r\n")
buf = ""
while '\r\n' not in buf:
buf += s.recv(1024)
l, buf = buf.split('\r\n', 1)
if l[0] != "$":
s.close()
raise Exception("Protocol error")
remaining = int(l[1:]) - len(buf)
if remaining > 0:
buf += s.recv(remaining)
s.close()
return dict(x.split(':', 1) for x in buf.split('\r\n') if ':' in x)
def execute(self):
stats = self.get_info()
values = {}
for k, v in self.get_fields():
try:
value = stats[k]
except KeyError:
value = "U"
values[k] = value
return values
def get_fields(self):
raise NotImplementedError('You must implement the get_fields function')
def get_context(self):
raise NotImplementedError('You must implement the get_context function')
def get(self, request):
context = self.get_context()
return render(request, 'monitor/prometheus_data.html', context, content_type="text/plain")
class RedisActiveConnection(RedisGrafanaMetric):
def get_context(self):
def get_fields(self):
return (
('connected_clients', dict(
label = "connections",
info = "connections",
type = "GAUGE",
)),
)
def get_context(self):
raise NotImplementedError('You must implement the get_context function')

View file

@ -15,14 +15,6 @@
"name": "NewsBlur Tests"
}
},
{
"pk": 3,
"model": "sites.site",
"fields": {
"domain": "nb.local.com",
"name": "NewsBlur"
}
},
{
"pk": 1,
"model": "auth.user",

View file

@ -120,7 +120,7 @@ services:
db_mongo:
container_name: db_mongo
image: mongo:4.0
image: mongo:3.6
restart: unless-stopped
ports:
- 29019:29019

View file

@ -13,3 +13,9 @@ providers:
options:
path: /etc/grafana/provisioning/dashboards/node_exporter_dashboard.json
foldersFromFilesStructure: true
- name: MongoDB
allowUiUpdates: true
type: file
options:
path: /etc/grafana/provisioning/dashboards/mongo_dashboard.json
foldersFromFilesStructure: true

File diff suppressed because it is too large Load diff

View file

@ -18,9 +18,22 @@
},
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"graphTooltip": 1,
"id": 4,
"links": [],
"links": [
{
"asDropdown": false,
"icon": "external link",
"includeVars": false,
"keepTime": false,
"tags": [],
"targetBlank": false,
"title": "",
"tooltip": "",
"type": "dashboards",
"url": ""
}
],
"panels": [
{
"collapsed": false,
@ -130,7 +143,7 @@
"label": null,
"logBase": 1,
"max": null,
"min": null,
"min": "0",
"show": true
},
{
@ -286,7 +299,9 @@
"decimals": 0,
"description": "Page Loads Per App Server",
"fieldConfig": {
"defaults": {},
"defaults": {
"unit": "short"
},
"overrides": []
},
"fill": 10,
@ -330,7 +345,9 @@
{
"exemplar": true,
"expr": "app_servers",
"format": "time_series",
"interval": "",
"intervalFactor": 4,
"legendFormat": "{{ app_server }}",
"queryType": "randomWalk",
"refId": "A"
@ -346,6 +363,7 @@
"sort": 0,
"value_type": "individual"
},
"transformations": [],
"type": "graph",
"xaxis": {
"buckets": null,
@ -361,7 +379,7 @@
"label": null,
"logBase": 1,
"max": null,
"min": null,
"min": "0",
"show": true
},
{
@ -435,6 +453,7 @@
"exemplar": true,
"expr": "app_times",
"interval": "",
"intervalFactor": 10,
"legendFormat": "{{app_server}}",
"queryType": "randomWalk",
"refId": "A"
@ -461,11 +480,11 @@
"yaxes": [
{
"$$hashKey": "object:799",
"decimals": 1,
"decimals": null,
"format": "s",
"label": null,
"logBase": 1,
"max": "1",
"max": null,
"min": "0",
"show": true
},
@ -525,7 +544,7 @@
"overrides": []
},
"gridPos": {
"h": 11,
"h": 22,
"w": 24,
"x": 0,
"y": 35
@ -582,7 +601,7 @@
"h": 7,
"w": 24,
"x": 0,
"y": 46
"y": 57
},
"hiddenSeries": false,
"id": 47,
@ -615,8 +634,9 @@
"targets": [
{
"exemplar": true,
"expr": "rate(db_times[60m])",
"expr": "db_times",
"interval": "",
"intervalFactor": 5,
"legendFormat": "{{db}}",
"queryType": "randomWalk",
"refId": "A"
@ -686,7 +706,7 @@
"h": 7,
"w": 24,
"x": 0,
"y": 53
"y": 64
},
"hiddenSeries": false,
"id": 48,
@ -777,7 +797,7 @@
"h": 1,
"w": 24,
"x": 0,
"y": 60
"y": 71
},
"id": 42,
"panels": [],
@ -797,25 +817,27 @@
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"h": 9,
"w": 24,
"x": 0,
"y": 61
"y": 72
},
"hiddenSeries": false,
"id": 33,
"legend": {
"avg": false,
"current": false,
"alignAsTable": true,
"avg": true,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null as zero",
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
@ -899,18 +921,20 @@
"h": 8,
"w": 24,
"x": 0,
"y": 69
"y": 81
},
"hiddenSeries": false,
"id": 44,
"legend": {
"avg": false,
"current": false,
"alignAsTable": true,
"avg": true,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
"values": true
},
"lines": true,
"linewidth": 1,
@ -982,6 +1006,7 @@
},
"yaxes": [
{
"$$hashKey": "object:169",
"format": "short",
"label": null,
"logBase": 1,
@ -990,6 +1015,7 @@
"show": true
},
{
"$$hashKey": "object:170",
"format": "short",
"label": null,
"logBase": 1,
@ -1023,7 +1049,7 @@
"h": 8,
"w": 24,
"x": 0,
"y": 77
"y": 89
},
"hiddenSeries": false,
"id": 30,
@ -1218,7 +1244,7 @@
"h": 8,
"w": 24,
"x": 0,
"y": 85
"y": 97
},
"hiddenSeries": false,
"id": 32,
@ -1319,7 +1345,7 @@
"h": 7,
"w": 24,
"x": 0,
"y": 93
"y": 105
},
"hiddenSeries": false,
"id": 28,
@ -1436,7 +1462,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 100
"y": 112
},
"hiddenSeries": false,
"id": 12,
@ -1469,6 +1495,7 @@
"exemplar": true,
"expr": "feed_success",
"interval": "",
"intervalFactor": 1,
"legendFormat": "feed success",
"queryType": "randomWalk",
"refId": "A"
@ -1533,7 +1560,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 100
"y": 112
},
"hiddenSeries": false,
"id": 26,
@ -1628,7 +1655,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 108
"y": 120
},
"hiddenSeries": false,
"id": 14,
@ -1714,7 +1741,7 @@
"h": 1,
"w": 24,
"x": 0,
"y": 116
"y": 128
},
"id": 16,
"panels": [
@ -1734,7 +1761,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 95
"y": 118
},
"hiddenSeries": false,
"id": 18,
@ -1829,7 +1856,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 95
"y": 118
},
"hiddenSeries": false,
"id": 24,
@ -1924,7 +1951,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 103
"y": 126
},
"hiddenSeries": false,
"id": 35,
@ -2019,7 +2046,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 103
"y": 126
},
"hiddenSeries": false,
"id": 8,
@ -2109,7 +2136,7 @@
"h": 1,
"w": 24,
"x": 0,
"y": 117
"y": 129
},
"id": 40,
"panels": [
@ -2196,5 +2223,5 @@
"timezone": "",
"title": "NewsBlur",
"uid": "T86VjXrG2",
"version": 19
}
"version": 27
}

File diff suppressed because it is too large Load diff

View file

@ -115,3 +115,53 @@ scrape_configs:
- targets: ['{{ monitor_server }}']
metrics_path: /monitor/users
scheme: https
- job_name: 'mongo heap usage'
static_configs:
- targets: ['{{ monitor_server }}']
metrics_path: /monitor/mongo-heap-usage
scheme: https
tls_config:
insecure_skip_verify: true
- job_name: 'mongo objects'
static_configs:
- targets: ['{{ monitor_server }}']
metrics_path: /monitor/mongo-objects
scheme: https
tls_config:
insecure_skip_verify: true
- job_name: 'mongo repl set lag'
static_configs:
- targets: ['{{ monitor_server }}']
metrics_path: /monitor/mongo-replset-lag
scheme: https
tls_config:
insecure_skip_verify: true
- job_name: 'mongo size'
static_configs:
- targets: ['{{ monitor_server }}']
metrics_path: /monitor/mongo-size
scheme: https
tls_config:
insecure_skip_verify: true
- job_name: 'mongo ops'
static_configs:
- targets: ['{{ monitor_server}}']
metrics_path: /monitor/mongo-ops
scheme: https
tls_config:
insecure_skip_verify: true
- job_name: 'mongo page faults'
static_configs:
- targets: ['{{ monitor_server }}']
metrics_path: /monitor/mongo-page-faults
scheme: https
tls_config:
insecure_skip_verify: true
- job_name: 'mongo page queues'
static_configs:
- targets: ['{{ monitor_server }}']
metrics_path: /monitor/mongo-page-queues
scheme: https
tls_config:
insecure_skip_verify: true

View file

@ -123,3 +123,52 @@ scrape_configs:
tls_config:
insecure_skip_verify: true
#- job_name: 'mongo heap usage'
# static_configs:
# - targets: ['haproxy']
# metrics_path: /monitor/mongo-heap-usage
# scheme: https
# tls_config:
# insecure_skip_verify: true
- job_name: 'mongo objects'
static_configs:
- targets: ['haproxy']
metrics_path: /monitor/mongo-objects
scheme: https
tls_config:
insecure_skip_verify: true
- job_name: 'mongo repl set lag'
static_configs:
- targets: ['haproxy']
metrics_path: /monitor/mongo-replset-lag
scheme: https
tls_config:
insecure_skip_verify: true
- job_name: 'mongo size'
static_configs:
- targets: ['haproxy']
metrics_path: /monitor/mongo-size
scheme: https
tls_config:
insecure_skip_verify: true
- job_name: 'mongo ops'
static_configs:
- targets: ['haproxy']
metrics_path: /monitor/mongo-ops
scheme: https
tls_config:
insecure_skip_verify: true
- job_name: 'mongo page faults'
static_configs:
- targets: ['haproxy']
metrics_path: /monitor/mongo-page-faults
scheme: https
tls_config:
insecure_skip_verify: true
- job_name: 'mongo page queues'
static_configs:
- targets: ['haproxy']
metrics_path: /monitor/mongo-page-queues
scheme: https
tls_config:
insecure_skip_verify: true

View file

@ -170,8 +170,8 @@ DO_TOKEN_FABRIC = '0000000000000000000000000000000000000000000000000000000000000
SERVER_NAME = "nblocalhost"
NEWSBLUR_URL = os.getenv("NEWSBLUR_URL", "https://localhost")
if NEWSBLUR_URL == 'https://nb.local.com':
SESSION_COOKIE_DOMAIN = ".nb.local.com"
if NEWSBLUR_URL == 'https://localhost':
SESSION_COOKIE_DOMAIN = "localhost"
SESSION_ENGINE = 'redis_sessions.session'

View file

@ -161,7 +161,7 @@ DO_TOKEN_LOG = '0000000000000000000000000000000000000000000000000000000000000000
DO_TOKEN_FABRIC = '0000000000000000000000000000000000000000000000000000000000000000'
SERVER_NAME = "nblocalhost"
NEWSBLUR_URL = 'http://nb.local.com'
NEWSBLUR_URL = 'https://localhost'
SESSION_ENGINE = 'redis_sessions.session'

3
package-lock.json generated Normal file
View file

@ -0,0 +1,3 @@
{
"lockfileVersion": 1
}