Revert "Revert "Merge branch 'add_grafana_monitors'""

This reverts commit a7f5025a16.
This commit is contained in:
Samuel Clay 2021-09-07 11:36:11 -04:00 committed by Jonathan Math
parent 202a05ae84
commit 3e23b6394a
16 changed files with 18631 additions and 15819 deletions

View file

@ -1,6 +1,7 @@
SHELL := /bin/bash
CURRENT_UID := $(shell id -u)
CURRENT_GID := $(shell id -g)
newsblur := $(shell docker ps -qf "name=newsblur_web")
.PHONY: node
@ -35,8 +36,7 @@ bash:
- RUNWITHMAKEBUILD=True CURRENT_UID=${CURRENT_UID} CURRENT_GID=${CURRENT_GID} docker-compose exec newsblur_web bash
# allows user to exec into newsblur_web and use pdb.
debug:
- newsblur := $(shell docker ps -qf "name=newsblur_web")
- CURRENT_UID=${CURRENT_UID} CURRENT_GID=${CURRENT_GID} docker attach ${newsblur}
- RUNWITHMAKEBUILD=True CURRENT_UID=${CURRENT_UID} CURRENT_GID=${CURRENT_GID} docker attach ${newsblur}
log:
- RUNWITHMAKEBUILD=True docker-compose logs -f --tail 20 newsblur_web newsblur_node
logweb: log

View file

@ -2,7 +2,8 @@ from django.conf.urls import url
from apps.monitor.views import ( AppServers, AppTimes,
Classifiers, DbTimes, Errors, FeedCounts, Feeds, LoadTimes,
Stories, TasksCodes, TasksPipeline, TasksServers, TasksTimes,
Updates, Users
Updates, Users, MongoDBHeapUsage, MongoDBObjects, MongoDBOpsReplsetLag,
MongoDBSize, MongoDBOps, MongoDBPageFaults, MongoDBPageQueues
)
urlpatterns = [
url(r'^app-servers?$', AppServers.as_view(), name="app_servers"),
@ -20,4 +21,11 @@ urlpatterns = [
url(r'^task-times?$', TasksTimes.as_view(), name="task_times"),
url(r'^updates?$', Updates.as_view(), name="updates"),
url(r'^users?$', Users.as_view(), name="users"),
url(r'^mongo-heap-usage?$', MongoDBHeapUsage.as_view(), name="mongo_heap_usage"),
url(r'^mongo-objects?$', MongoDBObjects.as_view(), name="mongo_db_objects"),
url(r'^mongo-replset-lag?$', MongoDBOpsReplsetLag.as_view(), name="mongo_ops_replset_lag"),
url(r'^mongo-size?$', MongoDBSize.as_view(), name="mongo_size"),
url(r'^mongo-ops?$', MongoDBOps.as_view(), name="mongo_ops"),
url(r'^mongo-page-faults?$', MongoDBPageFaults.as_view(), name="mongo_page_faults"),
url(r'^mongo-page-queues?$', MongoDBPageQueues.as_view(), name="mongo_page_queues"),
]

View file

@ -12,4 +12,6 @@ from apps.monitor.views.newsblur_tasks_pipeline import TasksPipeline
from apps.monitor.views.newsblur_tasks_servers import TasksServers
from apps.monitor.views.newsblur_tasks_times import TasksTimes
from apps.monitor.views.newsblur_updates import Updates
from apps.monitor.views.newsblur_users import Users
from apps.monitor.views.newsblur_users import Users
from apps.monitor.views.prometheus_mongo import MongoDBHeapUsage, MongoDBObjects, MongoDBOpsReplsetLag, MongoDBSize, MongoDBOps, MongoDBPageFaults, MongoDBPageQueues

View file

@ -0,0 +1,191 @@
import os
from django.views import View
from django.shortcuts import render
class MongoGrafanaMetric(View):
def __init__(self):
super(View, self).__init__()
self.dbname = os.environ.get('MONGODB_DATABASE')
host = os.environ.get('MONGODB_SERVER') or 'db_mongo:29019'
if ':' in host:
host, port = host.split(':')
port = int(port)
else:
port = 27017
self.server = (host, port)
@property
def connection(self):
if not hasattr(self, '_connection'):
import pymongo
self._connection = pymongo.MongoClient(self.server[0], self.server[1])
return self._connection
@property
def host(self):
return os.environ.get('MONGODB_SERVER') or 'db_mongo:29019'
def autoconf(self):
return bool(self.connection)
def get_context(self):
raise NotImplementedError('You must implement the get_context function')
def get(self, request):
context = self.get_context()
return render(request, 'monitor/prometheus_data.html', context, content_type="text/plain")
class MongoDBHeapUsage(MongoGrafanaMetric):
def get_context(self):
value = self.connection.admin.command('serverStatus')
try:
value = value['extra_info']['heap_usage_bytes']
except KeyError:
# I am getting this
value = "U"
data = {
'heap_usage_bytes': value
}
return {
"data": data,
"chart_name": 'heap_usage',
"chart_type": 'gauge',
}
class MongoDBObjects(MongoGrafanaMetric):
def get_context(self):
stats = self.connection.newsblur.command("dbstats")
data = dict(objects=stats['objects'])
formatted_data = {}
for k, v in data.items():
formatted_data[k] = f'mongo_objects{{db="{self.host}"}} {v}'
return {
"data": formatted_data,
"chart_name": 'objects',
"chart_type": 'gauge',
}
class MongoDBOpsReplsetLag(MongoGrafanaMetric):
# needs --replSet in docker command but when I do this, newsblur_web cnat connect to mongo
def _get_oplog_length(self):
oplog = self.connection.local.oplog.rs
last_op = oplog.find({}, {'ts': 1}).sort([('$natural', -1)]).limit(1)[0]['ts'].time
first_op = oplog.find({}, {'ts': 1}).sort([('$natural', 1)]).limit(1)[0]['ts'].time
oplog_length = last_op - first_op
return oplog_length
def _get_max_replication_lag(self):
PRIMARY_STATE = 1
SECONDARY_STATE = 2
status = self.connection.admin.command('replSetGetStatus')
members = status['members']
primary_optime = None
oldest_secondary_optime = None
for member in members:
member_state = member['state']
optime = member['optime']
if member_state == PRIMARY_STATE:
primary_optime = optime['ts'].time
elif member_state == SECONDARY_STATE:
if not oldest_secondary_optime or optime['ts'].time < oldest_secondary_optime:
oldest_secondary_optime = optime['ts'].time
if not primary_optime or not oldest_secondary_optime:
raise Exception("Replica set is not healthy")
return primary_optime - oldest_secondary_optime
def get_context(self):
# no such item for Cursor instance
oplog_length = self._get_oplog_length()
# not running with --replSet
replication_lag = self._get_max_replication_lag()
formatted_data = {}
for k, v in oplog_length.items():
formatted_data[k] = f'mongo_oplog{{type="length", db="{self.host}"}} {v}'
for k, v in replication_lag.items():
formatted_data[k] = f'mongo_oplog{{type="lag", db="{self.host}"}} {v}'
return {
"data": formatted_data,
"chart_name": 'oplog_metrics',
"chart_type": 'gauge',
}
class MongoDBSize(MongoGrafanaMetric):
def get_context(self):
stats = self.connection.newsblur.command("dbstats")
data = dict(size=stats['fsUsedSize'])
formatted_data = {}
for k, v in data.items():
formatted_data[k] = f'mongo_db_size{{db="{self.host}"}} {v}'
return {
"data": formatted_data,
"chart_name": 'db_size_bytes',
"chart_type": 'gauge',
}
class MongoDBOps(MongoGrafanaMetric):
def get_context(self):
status = self.connection.admin.command('serverStatus')
data = dict(
(q, status["opcounters"][q])
for q in status['opcounters'].keys()
)
formatted_data = {}
for k,v in data.items():
formatted_data[k] = f'mongo_ops{{type="{k}", db="{self.host}"}} {v}'
return {
"data": formatted_data,
"chart_name": 'ops',
"chart_type": 'gauge',
}
class MongoDBPageFaults(MongoGrafanaMetric):
def get_context(self):
status = self.connection.admin.command('serverStatus')
try:
value = status['extra_info']['page_faults']
except KeyError:
value = "U"
data = dict(page_faults=value)
formatted_data = {}
for k, v in data.items():
formatted_data[k] = f'mongo_page_faults{{db="{self.host}"}} {v}'
return {
"data": formatted_data,
"chart_name": 'page_faults',
"chart_type": 'counter',
}
class MongoDBPageQueues(MongoGrafanaMetric):
def get_context(self):
status = self.connection.admin.command('serverStatus')
data = dict(
(q, status["globalLock"]["currentQueue"][q])
for q in ("readers", "writers")
)
formatted_data = {}
for k, v in data.items():
formatted_data[k] = f'mongo_page_queues{{type="{k}", db="{self.host}"}} {v}'
return {
"data": formatted_data,
"chart_name": 'queues',
"chart_type": 'gauge',
}

View file

@ -0,0 +1,83 @@
import os
import socket
from django.views import View
from django.shortcuts import render
"""
RedisActiveConnections
RedisCommands
RedisConnects
RedisUsedMemory
RedisSize
"""
class RedisGrafanaMetric(View):
category = "Redis"
def autoconf(self):
try:
self.get_info()
except socket.error:
return False
return True
def get_info(self):
host = os.environ.get('REDIS_HOST') or '127.0.0.1'
port = int(os.environ.get('REDIS_PORT') or '6379')
if host.startswith('/'):
s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
s.connect(host)
else:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect((host, port))
s.send("*1\r\n$4\r\ninfo\r\n")
buf = ""
while '\r\n' not in buf:
buf += s.recv(1024)
l, buf = buf.split('\r\n', 1)
if l[0] != "$":
s.close()
raise Exception("Protocol error")
remaining = int(l[1:]) - len(buf)
if remaining > 0:
buf += s.recv(remaining)
s.close()
return dict(x.split(':', 1) for x in buf.split('\r\n') if ':' in x)
def execute(self):
stats = self.get_info()
values = {}
for k, v in self.get_fields():
try:
value = stats[k]
except KeyError:
value = "U"
values[k] = value
return values
def get_fields(self):
raise NotImplementedError('You must implement the get_fields function')
def get_context(self):
raise NotImplementedError('You must implement the get_context function')
def get(self, request):
context = self.get_context()
return render(request, 'monitor/prometheus_data.html', context, content_type="text/plain")
class RedisActiveConnection(RedisGrafanaMetric):
def get_context(self):
def get_fields(self):
return (
('connected_clients', dict(
label = "connections",
info = "connections",
type = "GAUGE",
)),
)
def get_context(self):
raise NotImplementedError('You must implement the get_context function')

View file

@ -15,14 +15,6 @@
"name": "NewsBlur Tests"
}
},
{
"pk": 3,
"model": "sites.site",
"fields": {
"domain": "nb.local.com",
"name": "NewsBlur"
}
},
{
"pk": 1,
"model": "auth.user",

View file

@ -120,7 +120,7 @@ services:
db_mongo:
container_name: db_mongo
image: mongo:4.0
image: mongo:3.6
restart: unless-stopped
ports:
- 29019:29019

View file

@ -12,4 +12,10 @@ providers:
type: file
options:
path: /etc/grafana/provisioning/dashboards/node_exporter_dashboard.json
foldersFromFilesStructure: true
- name: MongoDB
allowUiUpdates: true
type: file
options:
path: /etc/grafana/provisioning/dashboards/mongo_dashboard.json
foldersFromFilesStructure: true

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -115,3 +115,53 @@ scrape_configs:
- targets: ['{{ monitor_server }}']
metrics_path: /monitor/users
scheme: https
- job_name: 'mongo heap usage'
static_configs:
- targets: ['{{ monitor_server }}']
metrics_path: /monitor/mongo-heap-usage
scheme: https
tls_config:
insecure_skip_verify: true
- job_name: 'mongo objects'
static_configs:
- targets: ['{{ monitor_server }}']
metrics_path: /monitor/mongo-objects
scheme: https
tls_config:
insecure_skip_verify: true
- job_name: 'mongo repl set lag'
static_configs:
- targets: ['{{ monitor_server }}']
metrics_path: /monitor/mongo-replset-lag
scheme: https
tls_config:
insecure_skip_verify: true
- job_name: 'mongo size'
static_configs:
- targets: ['{{ monitor_server }}']
metrics_path: /monitor/mongo-size
scheme: https
tls_config:
insecure_skip_verify: true
- job_name: 'mongo ops'
static_configs:
- targets: ['{{ monitor_server}}']
metrics_path: /monitor/mongo-ops
scheme: https
tls_config:
insecure_skip_verify: true
- job_name: 'mongo page faults'
static_configs:
- targets: ['{{ monitor_server }}']
metrics_path: /monitor/mongo-page-faults
scheme: https
tls_config:
insecure_skip_verify: true
- job_name: 'mongo page queues'
static_configs:
- targets: ['{{ monitor_server }}']
metrics_path: /monitor/mongo-page-queues
scheme: https
tls_config:
insecure_skip_verify: true

View file

@ -122,4 +122,53 @@ scrape_configs:
scheme: https
tls_config:
insecure_skip_verify: true
#- job_name: 'mongo heap usage'
# static_configs:
# - targets: ['haproxy']
# metrics_path: /monitor/mongo-heap-usage
# scheme: https
# tls_config:
# insecure_skip_verify: true
- job_name: 'mongo objects'
static_configs:
- targets: ['haproxy']
metrics_path: /monitor/mongo-objects
scheme: https
tls_config:
insecure_skip_verify: true
- job_name: 'mongo repl set lag'
static_configs:
- targets: ['haproxy']
metrics_path: /monitor/mongo-replset-lag
scheme: https
tls_config:
insecure_skip_verify: true
- job_name: 'mongo size'
static_configs:
- targets: ['haproxy']
metrics_path: /monitor/mongo-size
scheme: https
tls_config:
insecure_skip_verify: true
- job_name: 'mongo ops'
static_configs:
- targets: ['haproxy']
metrics_path: /monitor/mongo-ops
scheme: https
tls_config:
insecure_skip_verify: true
- job_name: 'mongo page faults'
static_configs:
- targets: ['haproxy']
metrics_path: /monitor/mongo-page-faults
scheme: https
tls_config:
insecure_skip_verify: true
- job_name: 'mongo page queues'
static_configs:
- targets: ['haproxy']
metrics_path: /monitor/mongo-page-queues
scheme: https
tls_config:
insecure_skip_verify: true

View file

@ -170,8 +170,8 @@ DO_TOKEN_FABRIC = '0000000000000000000000000000000000000000000000000000000000000
SERVER_NAME = "nblocalhost"
NEWSBLUR_URL = os.getenv("NEWSBLUR_URL", "https://localhost")
if NEWSBLUR_URL == 'https://nb.local.com':
SESSION_COOKIE_DOMAIN = ".nb.local.com"
if NEWSBLUR_URL == 'https://localhost':
SESSION_COOKIE_DOMAIN = "localhost"
SESSION_ENGINE = 'redis_sessions.session'

View file

@ -161,7 +161,7 @@ DO_TOKEN_LOG = '0000000000000000000000000000000000000000000000000000000000000000
DO_TOKEN_FABRIC = '0000000000000000000000000000000000000000000000000000000000000000'
SERVER_NAME = "nblocalhost"
NEWSBLUR_URL = 'http://nb.local.com'
NEWSBLUR_URL = 'https://localhost'
SESSION_ENGINE = 'redis_sessions.session'

3
package-lock.json generated Normal file
View file

@ -0,0 +1,3 @@
{
"lockfileVersion": 1
}