Add munin plugin for send-receive timing

(imported from commit e2ae0775379ce59ab43213e68ade4d3f88b578e6)
This commit is contained in:
Leo Franchi 2013-01-28 15:59:27 -05:00
parent 6e3e6f00bd
commit 6e9b8d895c
4 changed files with 262 additions and 106 deletions

View File

@ -0,0 +1,137 @@
#!/usr/bin/env python
"""
Script to provide information about send-receive times.
It supports both munin and nagios outputs
It must be run on a machine that is using the live database for the
Django ORM.
"""
import datetime
import os
import sys
import optparse
import random
def total_seconds(timedelta):
return (timedelta.microseconds + (timedelta.seconds + timedelta.days * 24 * 3600) * 10**6) / 10.**6
usage = """Usage: send-receive.py [options] [config]
'config' is optional, if present will return config info.
Otherwise, returns the output data."""
parser = optparse.OptionParser(usage=usage)
parser.add_option('--site',
dest='site',
default="https://humbughq.com",
action='store')
parser.add_option('--nagios',
dest='nagios',
action='store_true')
parser.add_option('--munin',
dest='munin',
action='store_true')
(options, args) = parser.parse_args()
if not options.nagios and not options.munin:
print 'No output options specified! Please provide --munin or --nagios'
sys.exit(0)
if len(args) > 2:
print usage
sys.exit(0)
if options.munin:
if len(args) and args[0] == 'config':
print \
"""graph_title Send-Receive times
graph_info The number of seconds it takes to send and receive a message from the server
graph_args -u 5 -l 0
graph_vlabel RTT (seconds)
sendreceive.label Send-receive round trip time
sendreceive.warning 3
sendreceive.critical 5"""
sys.exit(0)
sys.path.append('/home/humbug/humbug/api')
import humbug
states = {
"OK": 0,
"WARNING": 1,
"CRITICAL": 2,
"UNKNOWN": 3
}
def report(state, time, msg=None):
if msg:
print "%s: %s" % (state, msg)
else:
print "%s: send time was %s" % (state, time)
exit(states[state])
def send_humbug(sender, message, nagios):
result = sender.send_message(message)
if result["result"] != "success" and nagios:
report("CRITICAL", "Error sending Humbug, args were: %s, %s" % (message, result))
def get_humbug(recipient, max_message_id):
return recipient.get_messages({'last': str(max_message_id)})['messages']
# hamlet and othello are default users
sender = "hamlet@humbughq.com"
recipient = "othello@humbughq.com"
humbug_sender = humbug.Client(
email=sender,
api_key="dfe1c934d555f4b9538d0d4cfd3069c2",
verbose=True,
client="test: Humbug API",
site=options.site)
humbug_recipient = humbug.Client(
email=recipient,
api_key="4e5d97591bec64bf57d2698ffbb563e3",
verbose=True,
client="test: Humbug API",
site=options.site)
max_message_id = humbug_recipient.get_profile().get('max_message_id')
msg_to_send = str(random.getrandbits(64))
time_start = datetime.datetime.now()
send_humbug(humbug_sender, {
"type": 'private',
"content": msg_to_send,
"subject": "time to send",
"to": recipient,
}, options.nagios)
msg_content = []
while msg_to_send not in msg_content:
messages = get_humbug(humbug_recipient, max_message_id)
time_diff = datetime.datetime.now() - time_start
# Prevents checking the same messages everytime in the conditional
# statement of the while loop
max_message_id = max([msg['id'] for msg in messages])
msg_content = [m['content'] for m in messages]
if options.nagios:
if time_diff.seconds > 3:
report('WARNING', time_diff)
if time_diff.seconds > 6:
report('CRITICAL', time_diff)
if options.munin:
print "sendreceive.value %s" % total_seconds(time_diff)
elif options.nagios:
report('OK', time_diff)

View File

@ -0,0 +1,2 @@
#!/bin/sh
/home/humbug/humbug/api/humbug/bots/check_send_receive.py --munin $1

View File

@ -0,0 +1,123 @@
# This file is used to configure how the plugins are invoked.
# Place in /etc/munin/plugin-conf.d/ or corresponding directory.
#
# PLEASE NOTE: Changes in the plugin-conf.d directory are only
# read at munin-node startup, so restart at any changes.
#
# user <user> # Set the user to run the plugin as.
# group <group> # Set the group to run the plugin as.
# command <command> # Run <command> instead of the plugin. %c expands to
# what would normally be run.
# env.<variable> <value> # Sets <variable> in the plugin's environment, see the
# individual plugins to find out which variables they
# care about.
[amavis]
group adm
env.MUNIN_MKTEMP /bin/mktemp -p /tmp/ $1
env.amavislog /var/log/mail.info
[apt]
user root
[courier_mta_mailqueue]
group daemon
[courier_mta_mailstats]
group adm
[courier_mta_mailvolume]
group adm
[cps*]
user root
[df*]
env.exclude none unknown iso9660 squashfs udf romfs ramfs debugfs
env.warning 92
env.critical 98
[exim_mailqueue]
group adm, (Debian-exim)
[exim_mailstats]
group adm, (Debian-exim)
env.logdir /var/log/exim4/
env.logname mainlog
[fw_conntrack]
user root
[fw_forwarded_local]
user root
[hddtemp_smartctl]
user root
[hddtemp2]
user root
[if_*]
user root
[if_err_*]
user nobody
[ip_*]
user root
[ipmi_*]
user root
[mysql*]
user root
env.mysqlopts --defaults-file=/etc/mysql/debian.cnf
env.mysqluser debian-sys-maint
env.mysqlconnection DBI:mysql:mysql;mysql_read_default_file=/etc/mysql/debian.cnf
[postfix_mailqueue]
user postfix
[postfix_mailstats]
group adm
[postfix_mailvolume]
group adm
env.logfile mail.log
[smart_*]
user root
[vlan*]
user root
[ejabberd*]
user ejabberd
env.statuses available away chat xa
env.days 1 7 30
[dhcpd3]
user root
env.leasefile /var/lib/dhcp3/dhcpd.leases
env.configfile /etc/dhcp3/dhcpd.conf
[jmx_*]
env.ip 127.0.0.1
env.port 5400
[samba]
user root
[munin_stats]
user munin
group munin
[postgres_*]
user postgres
env.PGUSER postgres
env.PGPORT 5432
[humbug_send_receive]
user humbug
group humbug

View File

@ -1,106 +0,0 @@
#!/usr/bin/env python
"""
Nagios plugin to check that messages take no longer than 15 seconds to
be received after being sent on Humbug.
It must be run on a machine that is using the live database for the
Django ORM.
"""
import datetime
import os
import sys
import optparse
usage = "usage: check_send_receive_time [options]"
parser = optparse.OptionParser(usage=usage)
parser.add_option('--site',
dest='site',
default="https://humbughq.com",
action='store')
(options, args) = parser.parse_args()
sys.path.append('/home/humbug/humbug/api')
import humbug
# Until we run scripts like this with a packaged and installed API, or
# rename the top-level humbug directory to something that doesn't
# conflict with an API module, we have to use hacks like this to be
# able to import both api.humbug and the top level humbug for
# humbug.settings.
del sys.modules["humbug"]
sys.path.insert(0, '/home/humbug/humbug')
os.environ['DJANGO_SETTINGS_MODULE'] = "humbug.settings"
from zephyr.models import UserProfile
states = {
"OK": 0,
"WARNING": 1,
"CRITICAL": 2,
"UNKNOWN": 3
}
def report(state, time, msg=None):
if msg:
print "%s: %s" % (state, msg)
else:
print "%s: send time was %s" % (state, time)
exit(states[state])
def send_humbug(sender, message):
result = sender.send_message(message)
if result["result"] != "success":
report("CRITICAL", "Error sending Humbug, args were: %s, %s" % (message, result))
def get_humbug(recipient, max_message_id):
return recipient.get_messages({'last': str(max_message_id)})['messages']
# Hamlet and Othello are default users
sender = UserProfile.objects.get(user__email="hamlet@humbughq.com")
recipient = UserProfile.objects.get(user__email="othello@humbughq.com")
humbug_sender = humbug.Client(
email=sender.user.email,
api_key=sender.api_key,
verbose=True,
client="test: Humbug API",
site=options.site)
humbug_recipient = humbug.Client(
email=recipient.user.email,
api_key=recipient.api_key,
verbose=True,
client="test: Humbug API",
site=options.site)
max_message_id = humbug_recipient.get_profile().get('max_message_id')
# This msg could be randomly generated everytime, may be better to do so.
msg_to_send = "Testing time to send and receive a message."
time_start = datetime.datetime.now()
send_humbug(humbug_sender, {
"type": 'private',
"content": msg_to_send,
"subject": "time to send",
"to": recipient.user.email,
})
msg_content = []
while msg_to_send not in msg_content:
messages = get_humbug(humbug_recipient, max_message_id)
time_diff = datetime.datetime.now() - time_start
# Prevents checking the same messages everytime in the conditional
# statement of the while loop
max_message_id = max([msg['id'] for msg in messages])
msg_content = [m['content'] for m in messages]
if time_diff.seconds > 3:
report('WARNING', time_diff)
if time_diff.seconds > 6:
report('CRITICAL', time_diff)
report('OK', time_diff)