同事有个需求,需要监控mongodb数据库中某一个collection的数据增长情况,原因是由于有定时任务不断往数据库里填充数据,如果定时任务挂掉了可以及时重启任务,保证数据的准确性。其实也可以监控定时任务的执行情况,之后再研究。
跟博客:使用ganglia监控mongodb集群 文章介绍的一样,使用python脚本来扩展,所以也就不多说,直接上传脚本代码
1. 创建mongodb_collection.pyconf脚本(collection根据情况换成表名)
-
# mongodb_collection.pyconf
-
modules {
-
module {
-
name = "mongodb_EPGInfo_www" #EPGInfo表示数据库名,www表示collection的名字
-
language = "python"
-
param total_count{
-
value = "/opt/mongodb/bin/mongo --host 10.9.201.190 --port 27017 EPGInfo --quiet --eval ‘printjson(db.www.find().count())‘" #语句的作用是获得www表中的记录数
-
}
-
}
-
}
-
collection_group {
-
collect_every = 30
-
time_threshold = 90
-
metric {
-
name = "www_count_total"
-
title = "Total Count"
-
}
-
metric {
-
name = "www_count_insert"
-
title = "Insert per Sec"
-
}
-
}
具体说明看代码中的注释
2. 创建mongodb_collection.py脚本
-
#!/usr/bin/env python
-
-
import os
-
import json
-
import string
-
import time
-
import copy
-
import re
-
-
PARAMS = {
-
"total_count" : "/opt/mongodb/bin/mongo --host 10.9.201.190 --port 27017 EPGInfo --quiet --eval ‘printjson(db.www.find().count())‘"
-
}
-
-
NAME_PREFIX = ‘www_‘
-
-
METRICS = {
-
‘time‘ : 0,
-
‘data‘ : {}
-
}
-
-
LAST_METRICS = copy.deepcopy(METRICS)
-
METRICS_CACHE_TTL = 3
-
-
def get_metrics():
-
"""Return all metrics"""
-
global METRICS, LAST_METRICS
-
if (time.time() - METRICS[‘time‘]) > METRICS_CACHE_TTL:
-
metrics = {}
-
# get raw metric data
-
o = os.popen(PARAMS[‘total_count‘])
-
# clean up
-
metrics_str = ‘‘.join(o.readlines()).strip() # convert to string
-
metrics_str = re.sub(‘\w+\((.*)\)‘, r"\1", metrics_str) # remove functions
-
-
metrics[‘count_total‘]=float(json.loads(metrics_str)) #增加监控字典
-
metrics[‘count_insert‘]=metrics[‘count_total‘] #增加监控字典
-
# update cache
-
LAST_METRICS = copy.deepcopy(METRICS)
-
METRICS = {
-
‘time‘: time.time(),
-
‘data‘: metrics
-
}
-
print(METRICS)
-
return [METRICS, LAST_METRICS]
-
-
def get_value(name):
-
"""Return a value for the requested metric"""
-
# get metrics
-
metrics = get_metrics()[0]
-
print(metrics)
-
# get value
-
name = name[len(NAME_PREFIX):] # remove prefix from name
-
try:
-
result = metrics[‘data‘][name]
-
except StandardError:
-
result = 0
-
return result
-
-
def get_rate(name):
-
"""Return change over time for the requested metric"""
-
# get metrics
-
[curr_metrics, last_metrics] = get_metrics()
-
# get rate
-
name = name[len(NAME_PREFIX):] # remove prefix from name
-
try:
-
rate = float(curr_metrics[‘data‘][name] - last_metrics[‘data‘][name]) /
-
float(curr_metrics[‘time‘] - last_metrics[‘time‘])
-
if rate < 0:
-
rate = float(0)
-
except StandardError:
-
rate = float(0)
-
return rate
-
-
def get_total_count(name):
-
total_count = get_value(name)
-
return total_count
-
-
def get_count_insert(name):
-
count_insert = get_rate(name)
-
return count_insert
-
-
def metric_init(lparams):
-
global PARAMS
-
-
# set parameters
-
for key in lparams:
-
PARAMS[key] = lparams[key]
-
-
# define descriptors
-
time_max = 60
-
groups = ‘EPGInfo_www‘
-
descriptors = [
-
{
-
‘name‘: NAME_PREFIX + ‘count_total‘,
-
‘call_back‘: get_total_count,
-
‘time_max‘: time_max,
-
‘value_type‘: ‘float‘,
-
‘units‘: ‘Total‘,
-
‘slope‘: ‘both‘,
-
‘format‘: ‘%f‘,
-
‘description‘: ‘Total Count‘,
-
‘groups‘: groups
-
},
-
{
-
‘name‘: NAME_PREFIX + ‘count_insert‘,
-
‘call_back‘: get_count_insert,
-
‘time_max‘: time_max,
-
‘value_type‘: ‘float‘,
-
‘units‘: ‘Inserts/Sec‘,
-
‘slope‘: ‘both‘,
-
‘format‘: ‘%f‘,
-
‘description‘: ‘Insert per Sec‘,
-
‘groups‘: groups
-
}
-
]
-
return descriptors
-
-
def metric_cleanup():
-
pass
-
-
if __name__ == ‘__main__‘:
-
descriptors = metric_init(PARAMS)
-
while True:
-
for d in descriptors:
-
print ((‘%s = %s‘) % (d[‘name‘], d[‘format‘])) % (d[‘call_back‘](d[‘name‘]))
-
print ‘‘
-
time.sleep(METRICS_CACHE_TTL)
具体说明看代码中的注释
使用ganglia监控mongodb数据库collection的增长
原文:http://blog.chinaunix.net/uid-13869856-id-5015899.html