mirror of https://github.com/zulip/zulip.git
10196 lines
363 KiB
Perl
10196 lines
363 KiB
Perl
|
#!/usr/bin/env perl
|
|||
|
# -*-mode:cperl; indent-tabs-mode: nil-*-
|
|||
|
|
|||
|
## Perform many different checks against Postgres databases.
|
|||
|
## Designed primarily as a Nagios script.
|
|||
|
## Run with --help for a summary.
|
|||
|
##
|
|||
|
## Greg Sabino Mullane <greg@endpoint.com>
|
|||
|
## End Point Corporation http://www.endpoint.com/
|
|||
|
## BSD licensed, see complete license at bottom of this script
|
|||
|
## The latest version can be found at:
|
|||
|
## http://www.bucardo.org/check_postgres/
|
|||
|
##
|
|||
|
## See the HISTORY section for other contributors
|
|||
|
|
|||
|
package check_postgres;
|
|||
|
|
|||
|
use 5.006001;
|
|||
|
use strict;
|
|||
|
use warnings;
|
|||
|
use utf8;
|
|||
|
use Getopt::Long qw/GetOptions/;
|
|||
|
Getopt::Long::Configure(qw/ no_ignore_case pass_through /);
|
|||
|
use File::Basename qw/basename/;
|
|||
|
use File::Temp qw/tempfile tempdir/;
|
|||
|
File::Temp->safe_level( File::Temp::MEDIUM );
|
|||
|
use Cwd;
|
|||
|
use Data::Dumper qw/Dumper/;
|
|||
|
$Data::Dumper::Varname = 'POSTGRES';
|
|||
|
$Data::Dumper::Indent = 2;
|
|||
|
$Data::Dumper::Useqq = 1;
|
|||
|
|
|||
|
our $VERSION = '2.19.0';
|
|||
|
|
|||
|
use vars qw/ %opt $PSQL $res $COM $SQL $db /;
|
|||
|
|
|||
|
## Which user to connect as if --dbuser is not given
|
|||
|
$opt{defaultuser} = 'postgres';
|
|||
|
|
|||
|
## Which port to connect to if --dbport is not given
|
|||
|
$opt{defaultport} = 5432;
|
|||
|
|
|||
|
## What type of output to use by default
|
|||
|
our $DEFAULT_OUTPUT = 'nagios';
|
|||
|
|
|||
|
## If psql is not in your path, it is recommended to hardcode it here,
|
|||
|
## as an alternative to the --PSQL option
|
|||
|
$PSQL = '';
|
|||
|
|
|||
|
## If this is true, $opt{PSQL} is disabled for security reasons
|
|||
|
our $NO_PSQL_OPTION = 1;
|
|||
|
|
|||
|
## If true, we show how long each query took by default. Requires Time::HiRes to be installed.
|
|||
|
$opt{showtime} = 1;
|
|||
|
|
|||
|
## If true, we show "after the pipe" statistics
|
|||
|
$opt{showperf} = 1;
|
|||
|
|
|||
|
## Default time display format, used for last_vacuum and last_analyze
|
|||
|
our $SHOWTIME = 'HH24:MI FMMonth DD, YYYY';
|
|||
|
|
|||
|
## Always prepend 'postgres_' to the name of the service in the output string
|
|||
|
our $FANCYNAME = 1;
|
|||
|
|
|||
|
## Change the service name to uppercase
|
|||
|
our $YELLNAME = 1;
|
|||
|
|
|||
|
## Preferred order of ways to fetch pages for new_version checks
|
|||
|
our $get_method_timeout = 30;
|
|||
|
our @get_methods = (
|
|||
|
"GET -t $get_method_timeout -H 'Pragma: no-cache'",
|
|||
|
"wget --quiet --timeout=$get_method_timeout --no-cache -O -",
|
|||
|
"curl --silent --max-time=$get_method_timeout -H 'Pragma: no-cache'",
|
|||
|
"fetch -q -T $get_method_timeout -o -",
|
|||
|
"lynx --connect-timeout=$get_method_timeout --dump",
|
|||
|
'links -dump',
|
|||
|
);
|
|||
|
|
|||
|
## Nothing below this line should need to be changed for normal usage.
|
|||
|
## If you do find yourself needing to change something,
|
|||
|
## please email the author as it probably indicates something
|
|||
|
## that could be made into a command-line option or moved above.
|
|||
|
|
|||
|
## Messages. Translations always welcome
|
|||
|
## Items without a leading tab still need translating
|
|||
|
## no critic (RequireInterpolationOfMetachars)
|
|||
|
our %msg = (
|
|||
|
'en' => {
|
|||
|
'address' => q{address},
|
|||
|
'age' => q{age},
|
|||
|
'backends-fatal' => q{Could not connect: too many connections},
|
|||
|
'backends-mrtg' => q{DB=$1 Max connections=$2},
|
|||
|
'backends-msg' => q{$1 of $2 connections ($3%)},
|
|||
|
'backends-nomax' => q{Could not determine max_connections},
|
|||
|
'backends-oknone' => q{No connections},
|
|||
|
'backends-po' => q{sorry, too many clients already},
|
|||
|
'backends-users' => q{$1 for number of users must be a number or percentage},
|
|||
|
'bloat-index' => q{(db $1) index $2 rows:$3 pages:$4 shouldbe:$5 ($6X) wasted bytes:$7 ($8)},
|
|||
|
'bloat-nomin' => q{no relations meet the minimum bloat criteria},
|
|||
|
'bloat-table' => q{(db $1) table $2.$3 rows:$4 pages:$5 shouldbe:$6 ($7X) wasted size:$8 ($9)},
|
|||
|
'bug-report' => q{Please report these details to check_postgres@bucardo.org:},
|
|||
|
'checkcluster-id' => q{Database system identifier:},
|
|||
|
'checkcluster-msg' => q{cluster_id: $1},
|
|||
|
'checkcluster-nomrtg'=> q{Must provide a number via the --mrtg option},
|
|||
|
'checkmode-prod' => q{in production},
|
|||
|
'checkmode-recovery' => q{in archive recovery},
|
|||
|
'checkmode-state' => q{Database cluster state:},
|
|||
|
'checkpoint-baddir' => q{Invalid data_directory: "$1"},
|
|||
|
'checkpoint-baddir2' => q{pg_controldata could not read the given data directory: "$1"},
|
|||
|
'checkpoint-badver' => q{Failed to run pg_controldata - probably the wrong version ($1)},
|
|||
|
'checkpoint-badver2' => q{Failed to run pg_controldata - is it the correct version?},
|
|||
|
'checkpoint-nodir' => q{Must supply a --datadir argument or set the PGDATA environment variable},
|
|||
|
'checkpoint-nodp' => q{Must install the Perl module Date::Parse to use the checkpoint action},
|
|||
|
'checkpoint-noparse' => q{Unable to parse pg_controldata output: "$1"},
|
|||
|
'checkpoint-noregex' => q{Unable to find the regex for this check},
|
|||
|
'checkpoint-nosys' => q{Could not call pg_controldata: $1},
|
|||
|
'checkpoint-ok' => q{Last checkpoint was 1 second ago},
|
|||
|
'checkpoint-ok2' => q{Last checkpoint was $1 seconds ago},
|
|||
|
'checkpoint-po' => q{Time of latest checkpoint:},
|
|||
|
'checksum-msg' => q{checksum: $1},
|
|||
|
'checksum-nomd' => q{Must install the Perl module Digest::MD5 to use the checksum action},
|
|||
|
'checksum-nomrtg' => q{Must provide a checksum via the --mrtg option},
|
|||
|
'custom-invalid' => q{Invalid format returned by custom query},
|
|||
|
'custom-norows' => q{No rows returned},
|
|||
|
'custom-nostring' => q{Must provide a query string},
|
|||
|
'database' => q{database},
|
|||
|
'dbsize-version' => q{Target database must be version 8.1 or higher to run the database_size action},
|
|||
|
'die-action-version' => q{Cannot run "$1": server version must be >= $2, but is $3},
|
|||
|
'die-badtime' => q{Value for '$1' must be a valid time. Examples: -$2 1s -$2 "10 minutes"},
|
|||
|
'die-badversion' => q{Invalid version string: $1},
|
|||
|
'die-noset' => q{Cannot run "$1" $2 is not set to on},
|
|||
|
'die-nosetting' => q{Could not fetch setting '$1'},
|
|||
|
'diskspace-fail' => q{Invalid result from command "$1": $2},
|
|||
|
'diskspace-msg' => q{FS $1 mounted on $2 is using $3 of $4 ($5%)},
|
|||
|
'diskspace-nodata' => q{Could not determine data_directory: are you connecting as a superuser?},
|
|||
|
'diskspace-nodf' => q{Could not find required executable /bin/df},
|
|||
|
'diskspace-nodir' => q{Could not find data directory "$1"},
|
|||
|
'file-noclose' => q{Could not close $1: $2},
|
|||
|
'files' => q{files},
|
|||
|
'fsm-page-highver' => q{Cannot check fsm_pages on servers version 8.4 or greater},
|
|||
|
'fsm-page-msg' => q{fsm page slots used: $1 of $2 ($3%)},
|
|||
|
'fsm-rel-highver' => q{Cannot check fsm_relations on servers version 8.4 or greater},
|
|||
|
'fsm-rel-msg' => q{fsm relations used: $1 of $2 ($3%)},
|
|||
|
'hs-no-role' => q{Not a master/slave couple},
|
|||
|
'hs-no-location' => q{Could not get current xlog location on $1},
|
|||
|
'hs-receive-delay' => q{receive-delay},
|
|||
|
'hs-replay-delay' => q{replay_delay},
|
|||
|
'index' => q{Index},
|
|||
|
'invalid-option' => q{Invalid option},
|
|||
|
'invalid-query' => q{Invalid query returned: $1},
|
|||
|
'language' => q{Language},
|
|||
|
'listener-msg' => q{listeners found: $1},
|
|||
|
'listening' => q{listening},
|
|||
|
'locks-msg' => q{total "$1" locks: $2},
|
|||
|
'locks-msg2' => q{total locks: $1},
|
|||
|
'logfile-bad' => q{Invalid logfile "$1"},
|
|||
|
'logfile-debug' => q{Final logfile: $1},
|
|||
|
'logfile-dne' => q{logfile $1 does not exist!},
|
|||
|
'logfile-fail' => q{fails logging to: $1},
|
|||
|
'logfile-ok' => q{logs to: $1},
|
|||
|
'logfile-openfail' => q{logfile "$1" failed to open: $2},
|
|||
|
'logfile-opt-bad' => q{Invalid logfile option},
|
|||
|
'logfile-seekfail' => q{Seek on $1 failed: $2},
|
|||
|
'logfile-stderr' => q{Logfile output has been redirected to stderr: please provide a filename},
|
|||
|
'logfile-syslog' => q{Database is using syslog, please specify path with --logfile option (fac=$1)},
|
|||
|
'mode-standby' => q{Server in standby mode},
|
|||
|
'mode' => q{mode},
|
|||
|
'mrtg-fail' => q{Action $1 failed: $2},
|
|||
|
'new-ver-nocver' => q{Could not download version information for $1},
|
|||
|
'new-ver-badver' => q{Could not parse version information for $1},
|
|||
|
'new-ver-dev' => q{Cannot compare versions on development versions: you have $1 version $2},
|
|||
|
'new-ver-nolver' => q{Could not determine local version information for $1},
|
|||
|
'new-ver-ok' => q{Version $1 is the latest for $2},
|
|||
|
'new-ver-warn' => q{Please upgrade to version $1 of $2. You are running $3},
|
|||
|
'new-ver-tt' => q{Your version of $1 ($2) appears to be ahead of the current release! ($3)},
|
|||
|
'no-db' => q{No databases},
|
|||
|
'no-match-db' => q{No matching databases found due to exclusion/inclusion options},
|
|||
|
'no-match-fs' => q{No matching file systems found due to exclusion/inclusion options},
|
|||
|
'no-match-rel' => q{No matching relations found due to exclusion/inclusion options},
|
|||
|
'no-match-set' => q{No matching settings found due to exclusion/inclusion options},
|
|||
|
'no-match-table' => q{No matching tables found due to exclusion/inclusion options},
|
|||
|
'no-match-user' => q{No matching entries found due to user exclusion/inclusion options},
|
|||
|
'no-parse-psql' => q{Could not parse psql output!},
|
|||
|
'no-time-hires' => q{Cannot find Time::HiRes, needed if 'showtime' is true},
|
|||
|
'opt-output-invalid' => q{Invalid output: must be 'nagios' or 'mrtg' or 'simple' or 'cacti'},
|
|||
|
'opt-psql-badpath' => q{Invalid psql argument: must be full path to a file named psql},
|
|||
|
'opt-psql-noexec' => q{The file "$1" does not appear to be executable},
|
|||
|
'opt-psql-noexist' => q{Cannot find given psql executable: $1},
|
|||
|
'opt-psql-nofind' => q{Could not find a suitable psql executable},
|
|||
|
'opt-psql-nover' => q{Could not determine psql version},
|
|||
|
'opt-psql-restrict' => q{Cannot use the --PSQL option when NO_PSQL_OPTION is on},
|
|||
|
'pgbouncer-pool' => q{Pool=$1 $2=$3},
|
|||
|
'pgb-backends-mrtg' => q{DB=$1 Max connections=$2},
|
|||
|
'pgb-backends-msg' => q{$1 of $2 connections ($3%)},
|
|||
|
'pgb-backends-none' => q{No connections},
|
|||
|
'pgb-backends-users' => q{$1 for number of users must be a number or percentage},
|
|||
|
'PID' => q{PID},
|
|||
|
'port' => q{port},
|
|||
|
'preptxn-none' => q{No prepared transactions found},
|
|||
|
'psa-disabled' => q{No queries - is stats_command_string or track_activities off?},
|
|||
|
'psa-noexact' => q{Unknown error},
|
|||
|
'psa-nosuper' => q{No matches - please run as a superuser},
|
|||
|
'qtime-count-msg' => q{Total queries: $1},
|
|||
|
'qtime-count-none' => q{not more than $1 queries},
|
|||
|
'qtime-for-msg' => q{$1 queries longer than $2s, longest: $3s$4 $5},
|
|||
|
'qtime-msg' => q{longest query: $1s$2 $3},
|
|||
|
'qtime-none' => q{no queries},
|
|||
|
'queries' => q{queries},
|
|||
|
'query-time' => q{query_time},
|
|||
|
'range-badcs' => q{Invalid '$1' option: must be a checksum},
|
|||
|
'range-badlock' => q{Invalid '$1' option: must be number of locks, or "type1=#;type2=#"},
|
|||
|
'range-badpercent' => q{Invalid '$1' option: must be a percentage},
|
|||
|
'range-badpercsize' => q{Invalid '$1' option: must be a size or a percentage},
|
|||
|
'range-badsize' => q{Invalid size for '$1' option},
|
|||
|
'range-badtype' => q{validate_range called with unknown type '$1'},
|
|||
|
'range-badversion' => q{Invalid string for '$1' option: $2},
|
|||
|
'range-cactionly' => q{This action is for cacti use only and takes no warning or critical arguments},
|
|||
|
'range-int' => q{Invalid argument for '$1' option: must be an integer},
|
|||
|
'range-int-pos' => q{Invalid argument for '$1' option: must be a positive integer},
|
|||
|
'range-neg-percent' => q{Cannot specify a negative percentage!},
|
|||
|
'range-none' => q{No warning or critical options are needed},
|
|||
|
'range-noopt-both' => q{Must provide both 'warning' and 'critical' options},
|
|||
|
'range-noopt-one' => q{Must provide a 'warning' or 'critical' option},
|
|||
|
'range-noopt-only' => q{Can only provide 'warning' OR 'critical' option},
|
|||
|
'range-noopt-orboth' => q{Must provide a 'warning' option, a 'critical' option, or both},
|
|||
|
'range-noopt-size' => q{Must provide a warning and/or critical size},
|
|||
|
'range-nosize' => q{Must provide a warning and/or critical size},
|
|||
|
'range-notime' => q{Must provide a warning and/or critical time},
|
|||
|
'range-seconds' => q{Invalid argument to '$1' option: must be number of seconds},
|
|||
|
'range-version' => q{must be in the format X.Y or X.Y.Z, where X is the major version number, },
|
|||
|
'range-warnbig' => q{The 'warning' option cannot be greater than the 'critical' option},
|
|||
|
'range-warnbigsize' => q{The 'warning' option ($1 bytes) cannot be larger than the 'critical' option ($2 bytes)},
|
|||
|
'range-warnbigtime' => q{The 'warning' option ($1 s) cannot be larger than the 'critical' option ($2 s)},
|
|||
|
'range-warnsmall' => q{The 'warning' option cannot be less than the 'critical' option},
|
|||
|
'range-nointfortime' => q{Invalid argument for '$1' options: must be an integer, time or integer for time},
|
|||
|
'relsize-msg-ind' => q{largest index is "$1": $2},
|
|||
|
'relsize-msg-reli' => q{largest relation is index "$1": $2},
|
|||
|
'relsize-msg-relt' => q{largest relation is table "$1": $2},
|
|||
|
'relsize-msg-tab' => q{largest table is "$1": $2},
|
|||
|
'rep-badarg' => q{Invalid repinfo argument: expected 6 comma-separated values},
|
|||
|
'rep-duh' => q{Makes no sense to test replication with same values},
|
|||
|
'rep-fail' => q{Row not replicated to slave $1},
|
|||
|
'rep-noarg' => q{Need a repinfo argument},
|
|||
|
'rep-norow' => q{Replication source row not found: $1},
|
|||
|
'rep-noslaves' => q{No slaves found},
|
|||
|
'rep-notsame' => q{Cannot test replication: values are not the same},
|
|||
|
'rep-ok' => q{Row was replicated},
|
|||
|
'rep-sourcefail' => q{Source update failed},
|
|||
|
'rep-timeout' => q{Row was not replicated. Timeout: $1},
|
|||
|
'rep-unknown' => q{Replication check failed},
|
|||
|
'rep-wrongvals' => q{Cannot test replication: values are not the right ones ($1 not $2 nor $3)},
|
|||
|
'runcommand-err' => q{Unknown error inside of the "run_command" function},
|
|||
|
'runcommand-nodb' => q{No target databases could be found},
|
|||
|
'runcommand-nodupe' => q{Could not dupe STDERR},
|
|||
|
'runcommand-noerr' => q{Could not open STDERR?!},
|
|||
|
'runcommand-nosys' => q{System call failed with a $1},
|
|||
|
'runcommand-pgpass' => q{Created temporary pgpass file $1},
|
|||
|
'runcommand-timeout' => q{Command timed out! Consider boosting --timeout higher than $1},
|
|||
|
'runtime-badmrtg' => q{invalid queryname?},
|
|||
|
'runtime-badname' => q{Invalid queryname option: must be a simple view name},
|
|||
|
'runtime-msg' => q{query runtime: $1 seconds},
|
|||
|
'schema' => q{Schema},
|
|||
|
'ss-createfile' => q{Created file $1},
|
|||
|
'ss-different' => q{"$1" is different:},
|
|||
|
'ss-existson' => q{Exists on:},
|
|||
|
'ss-failed' => q{Databases were different. Items not matched: $1},
|
|||
|
'ss-matched' => q{All databases have identical items},
|
|||
|
'ss-missingon' => q{Missing on:},
|
|||
|
'ss-noexist' => q{$1 "$2" does not exist on all databases:},
|
|||
|
'ss-notset' => q{"$1" is not set on all databases:},
|
|||
|
'ss-suffix' => q{Error: cannot use suffix unless looking at time-based schemas},
|
|||
|
'seq-die' => q{Could not determine information about sequence $1},
|
|||
|
'seq-msg' => q{$1=$2% (calls left=$3)},
|
|||
|
'seq-none' => q{No sequences found},
|
|||
|
'size' => q{size},
|
|||
|
'slony-noschema' => q{Could not determine the schema for Slony},
|
|||
|
'slony-nonumber' => q{Call to sl_status did not return a number},
|
|||
|
'slony-lagtime' => q{Slony lag time: $1},
|
|||
|
'symlink-create' => q{Created "$1"},
|
|||
|
'symlink-done' => q{Not creating "$1": $2 already linked to "$3"},
|
|||
|
'symlink-exists' => q{Not creating "$1": $2 file already exists},
|
|||
|
'symlink-fail1' => q{Failed to unlink "$1": $2},
|
|||
|
'symlink-fail2' => q{Could not symlink $1 to $2: $3},
|
|||
|
'symlink-name' => q{This command will not work unless the program has the word "postgres" in it},
|
|||
|
'symlink-unlink' => q{Unlinking "$1":$2 },
|
|||
|
'table' => q{Table},
|
|||
|
'testmode-end' => q{END OF TEST MODE},
|
|||
|
'testmode-fail' => q{Connection failed: $1 $2},
|
|||
|
'testmode-norun' => q{Cannot run "$1" on $2: version must be >= $3, but is $4},
|
|||
|
'testmode-noset' => q{Cannot run "$1" on $2: $3 is not set to on},
|
|||
|
'testmode-nover' => q{Could not find version for $1},
|
|||
|
'testmode-ok' => q{Connection ok: $1},
|
|||
|
'testmode-start' => q{BEGIN TEST MODE},
|
|||
|
'time-day' => q{day},
|
|||
|
'time-days' => q{days},
|
|||
|
'time-hour' => q{hour},
|
|||
|
'time-hours' => q{hours},
|
|||
|
'time-minute' => q{minute},
|
|||
|
'time-minutes' => q{minutes},
|
|||
|
'time-month' => q{month},
|
|||
|
'time-months' => q{months},
|
|||
|
'time-second' => q{second},
|
|||
|
'time-seconds' => q{seconds},
|
|||
|
'time-week' => q{week},
|
|||
|
'time-weeks' => q{weeks},
|
|||
|
'time-year' => q{year},
|
|||
|
'time-years' => q{years},
|
|||
|
'timesync-diff' => q{diff},
|
|||
|
'timesync-msg' => q{timediff=$1 DB=$2 Local=$3},
|
|||
|
'transactions' => q{transactions},
|
|||
|
'trigger-msg' => q{Disabled triggers: $1},
|
|||
|
'txn-time' => q{transaction_time},
|
|||
|
'txnidle-count-msg' => q{Total idle in transaction: $1},
|
|||
|
'txnidle-count-none' => q{not more than $1 idle in transaction},
|
|||
|
'txnidle-for-msg' => q{$1 idle transactions longer than $2s, longest: $3s$4 $5},
|
|||
|
'txnidle-msg' => q{longest idle in txn: $1s$2 $3},
|
|||
|
'txnidle-none' => q{no idle in transaction},
|
|||
|
'txntime-count-msg' => q{Total transactions: $1},
|
|||
|
'txntime-count-none' => q{not more than $1 transactions},
|
|||
|
'txntime-for-msg' => q{$1 transactions longer than $2s, longest: $3s$4 $5},
|
|||
|
'txntime-msg' => q{longest txn: $1s$2 $3},
|
|||
|
'txntime-none' => q{No transactions},
|
|||
|
'txnwrap-cbig' => q{The 'critical' value must be less than 2 billion},
|
|||
|
'txnwrap-wbig' => q{The 'warning' value must be less than 2 billion},
|
|||
|
'unknown-error' => q{Unknown error},
|
|||
|
'usage' => qq{\nUsage: \$1 <options>\n Try "\$1 --help" for a complete list of options\n Try "\$1 --man" for the full manual\n},
|
|||
|
'user' => q{User},
|
|||
|
'username' => q{username},
|
|||
|
'vac-nomatch-a' => q{No matching tables have ever been analyzed},
|
|||
|
'vac-nomatch-v' => q{No matching tables have ever been vacuumed},
|
|||
|
'version' => q{version $1},
|
|||
|
'version-badmrtg' => q{Invalid mrtg version argument},
|
|||
|
'version-fail' => q{version $1, but expected $2},
|
|||
|
'version-ok' => q{version $1},
|
|||
|
'wal-numfound' => q{WAL files found: $1},
|
|||
|
'wal-numfound2' => q{WAL "$2" files found: $1},
|
|||
|
},
|
|||
|
'fr' => {
|
|||
|
'address' => q{adresse},
|
|||
|
'age' => q{âge},
|
|||
|
'backends-fatal' => q{N'a pas pu se connecter : trop de connexions},
|
|||
|
'backends-mrtg' => q{DB=$1 Connexions maximum=$2},
|
|||
|
'backends-msg' => q{$1 connexions sur $2 ($3%)},
|
|||
|
'backends-nomax' => q{N'a pas pu déterminer max_connections},
|
|||
|
'backends-oknone' => q{Aucune connexion},
|
|||
|
'backends-po' => q{désolé, trop de clients sont déjà connectés},
|
|||
|
'backends-users' => q{$1 pour le nombre d'utilisateurs doit être un nombre ou un pourcentage},
|
|||
|
'bloat-index' => q{(db $1) index $2 lignes:$3 pages:$4 devrait être:$5 ($6X) octets perdus:$7 ($8)},
|
|||
|
'bloat-nomin' => q{aucune relation n'atteint le critère minimum de fragmentation},
|
|||
|
'bloat-table' => q{(db $1) table $2.$3 lignes:$4 pages:$5 devrait être:$6 ($7X) place perdue:$8 ($9)},
|
|||
|
'bug-report' => q{Merci de rapporter ces d??tails ?? check_postgres@bucardo.org:},
|
|||
|
'checkcluster-id' => q{Database system identifier:},
|
|||
|
'checkcluster-msg' => q{cluster_id: $1},
|
|||
|
'checkcluster-nomrtg'=> q{Must provide a number via the --mrtg option},
|
|||
|
'checkmode-prod' => q{in production},
|
|||
|
'checkmode-recovery' => q{en restauration d'archives},
|
|||
|
'checkmode-state' => q{État de l'instance :},
|
|||
|
'checkpoint-baddir' => q{data_directory invalide : "$1"},
|
|||
|
'checkpoint-baddir2' => q{pg_controldata n'a pas pu lire le répertoire des données indiqué : « $1 »},
|
|||
|
'checkpoint-badver' => q{Échec lors de l'exécution de pg_controldata - probablement la mauvaise version ($1)},
|
|||
|
'checkpoint-badver2' => q{Échec lors de l'exécution de pg_controldata - est-ce la bonne version ?},
|
|||
|
'checkpoint-nodir' => q{Vous devez fournir un argument --datadir ou configurer la variable d'environnement PGDATA},
|
|||
|
'checkpoint-nodp' => q{Vous devez installer le module Perl Date::Parse pour utiliser l'action checkpoint},
|
|||
|
'checkpoint-noparse' => q{Incapable d'analyser le résultat de la commande pg_controldata : "$1"},
|
|||
|
'checkpoint-noregex' => q{La regex pour ce test n'a pas été trouvée},
|
|||
|
'checkpoint-nosys' => q{N'a pas pu appeler pg_controldata : $1},
|
|||
|
'checkpoint-ok' => q{Le dernier CHECKPOINT est survenu il y a une seconde},
|
|||
|
'checkpoint-ok2' => q{Le dernier CHECKPOINT est survenu il y a $1 secondes},
|
|||
|
'checkpoint-po' => q{Heure du dernier point de contr<74>le :},
|
|||
|
'checksum-msg' => q{somme de contrôle : $1},
|
|||
|
'checksum-nomd' => q{Vous devez installer le module Perl Digest::MD5 pour utiliser l'action checksum},
|
|||
|
'checksum-nomrtg' => q{Vous devez fournir une somme de contrôle avec l'option --mrtg},
|
|||
|
'custom-invalid' => q{Format invalide renvoyé par la requête personnalisée},
|
|||
|
'custom-norows' => q{Aucune ligne renvoyée},
|
|||
|
'custom-nostring' => q{Vous devez fournir une requête},
|
|||
|
'database' => q{base de données},
|
|||
|
'dbsize-version' => q{La base de données cible doit être une version 8.1 ou ultérieure pour exécuter l'action database_size},
|
|||
|
'die-action-version' => q{Ne peut pas exécuter « $1 » : la version du serveur doit être supérieure ou égale à $2, alors qu'elle est $3},
|
|||
|
'die-badtime' => q{La valeur de « $1 » doit être une heure valide. Par exemple, -$2 1s -$2 « 10 minutes »},
|
|||
|
'die-badversion' => q{Version invalide : $1},
|
|||
|
'die-noset' => q{Ne peut pas exécuter « $1 » $2 n'est pas activé},
|
|||
|
'die-nosetting' => q{N'a pas pu récupérer le paramètre « $1 »},
|
|||
|
'diskspace-fail' => q{Résultat invalide pour la commande « $1 » : $2},
|
|||
|
'diskspace-msg' => q{Le système de fichiers $1 monté sur $2 utilise $3 sur $4 ($5%)},
|
|||
|
'diskspace-nodata' => q{N'a pas pu déterminer data_directory : êtes-vous connecté en tant que super-utilisateur ?},
|
|||
|
'diskspace-nodf' => q{N'a pas pu trouver l'exécutable /bin/df},
|
|||
|
'diskspace-nodir' => q{N'a pas pu trouver le répertoire des données « $1 »},
|
|||
|
'files' => q{fichiers},
|
|||
|
'file-noclose' => q{N'a pas pu fermer $1 : $2},
|
|||
|
'fsm-page-highver' => q{Ne peut pas vérifier fsm_pages sur des serveurs en version 8.4 ou ultérieure},
|
|||
|
'fsm-page-msg' => q{emplacements de pages utilisés par la FSM : $1 sur $2 ($3%)},
|
|||
|
'fsm-rel-highver' => q{Ne peut pas vérifier fsm_relations sur des serveurs en version 8.4 ou ultérieure},
|
|||
|
'fsm-rel-msg' => q{relations tracées par la FSM : $1 sur $2 ($3%)},
|
|||
|
'hs-no-role' => q{Pas de couple ma??tre/esclave},
|
|||
|
'hs-no-location' => q{N'a pas pu obtenir l'emplacement courant dans le journal des transactions sur $1},
|
|||
|
'hs-receive-delay' => q{délai de réception},
|
|||
|
'hs-replay-delay' => q{délai de rejeu},
|
|||
|
'index' => q{Index},
|
|||
|
'invalid-option' => q{Option invalide},
|
|||
|
'invalid-query' => q{Une requête invalide a renvoyé : $1},
|
|||
|
'language' => q{Langage},
|
|||
|
'listener-msg' => q{processus LISTEN trouvés : $1},
|
|||
|
'listening' => q{en écoute},
|
|||
|
'locks-msg' => q{total des verrous « $1 » : $2},
|
|||
|
'locks-msg2' => q{total des verrous : $1},
|
|||
|
'logfile-bad' => q{Option logfile invalide « $1 »},
|
|||
|
'logfile-debug' => q{Journal applicatif final : $1},
|
|||
|
'logfile-dne' => q{le journal applicatif $1 n'existe pas !},
|
|||
|
'logfile-fail' => q{échec pour tracer dans : $1},
|
|||
|
'logfile-ok' => q{trace dans : $1},
|
|||
|
'logfile-openfail' => q{échec pour l'ouverture du journal applicatif « $1 » : $2},
|
|||
|
'logfile-opt-bad' => q{Option logfile invalide},
|
|||
|
'logfile-seekfail' => q{Échec de la recherche dans $1 : $2},
|
|||
|
'logfile-stderr' => q{La sortie des traces a été redirigés stderr : merci de fournir un nom de fichier},
|
|||
|
'logfile-syslog' => q{La base de données utiliser syslog, merci de spécifier le chemin avec l'option --logfile (fac=$1)},
|
|||
|
'mode-standby' => q{Serveur en mode standby},
|
|||
|
'mode' => q{mode},
|
|||
|
'mrtg-fail' => q{Échec de l'action $1 : $2},
|
|||
|
'new-ver-nocver' => q{N'a pas pu t??l??charger les informations de version pour $1},
|
|||
|
'new-ver-badver' => q{N'a pas pu analyser les informations de version pour $1},
|
|||
|
'new-ver-dev' => q{Ne peut pas comparer les versions sur des versions de d??veloppement : vous avez $1 version $2},
|
|||
|
'new-ver-nolver' => q{N'a pas pu d??terminer les informations de version locale pour $1},
|
|||
|
'new-ver-ok' => q{La version $1 est la dernière pour $2},
|
|||
|
'new-ver-warn' => q{Merci de mettre à jour vers la version $1 de $2. Vous utilisez actuellement la $3},
|
|||
|
'new-ver-tt' => q{Votre version de $1 ($2) semble ult??rieure ?? la version courante ! ($3)},
|
|||
|
'no-db' => q{Pas de bases de données},
|
|||
|
'no-match-db' => q{Aucune base de données trouvée à cause des options d'exclusion/inclusion},
|
|||
|
'no-match-fs' => q{Aucun système de fichier trouvé à cause des options d'exclusion/inclusion},
|
|||
|
'no-match-rel' => q{Aucune relation trouvée à cause des options d'exclusion/inclusion},
|
|||
|
'no-match-set' => q{Aucun paramètre trouvé à cause des options d'exclusion/inclusion},
|
|||
|
'no-match-table' => q{Aucune table trouvée à cause des options d'exclusion/inclusion},
|
|||
|
'no-match-user' => q{Aucune entrée trouvée à cause options d'exclusion/inclusion},
|
|||
|
'no-parse-psql' => q{N'a pas pu analyser la sortie de psql !},
|
|||
|
'no-time-hires' => q{N'a pas trouvé le module Time::HiRes, nécessaire quand « showtime » est activé},
|
|||
|
'opt-output-invalid' => q{Sortie invalide : doit être 'nagios' ou 'mrtg' ou 'simple' ou 'cacti'},
|
|||
|
'opt-psql-badpath' => q{Argument invalide pour psql : doit être le chemin complet vers un fichier nommé psql},
|
|||
|
'opt-psql-noexec' => q{ Le fichier « $1 » ne paraît pas exécutable},
|
|||
|
'opt-psql-noexist' => q{Ne peut pas trouver l'exécutable psql indiqué : $1},
|
|||
|
'opt-psql-nofind' => q{N'a pas pu trouver un psql exécutable},
|
|||
|
'opt-psql-nover' => q{N'a pas pu déterminer la version de psql},
|
|||
|
'opt-psql-restrict' => q{Ne peut pas utiliser l'option --PSQL si NO_PSQL_OPTION est activé},
|
|||
|
'pgbouncer-pool' => q{Pool=$1 $2=$3},
|
|||
|
'pgb-backends-mrtg' => q{base=$1 connexions max=$2},
|
|||
|
'pgb-backends-msg' => q{$1 connexions sur $2 ($3%)},
|
|||
|
'pgb-backends-none' => q{Aucune connection},
|
|||
|
'pgb-backends-users' => q{Le nombre d'utilisateurs, $1, doit être un nombre ou un pourcentage},
|
|||
|
'PID' => q{PID},
|
|||
|
'port' => q{port},
|
|||
|
'preptxn-none' => q{Aucune transaction préparée trouvée},
|
|||
|
'psa-disabled' => q{Pas de requ??te - est-ce que stats_command_string ou track_activities sont d??sactiv??s ?},
|
|||
|
'psa-noexact' => q{Erreur inconnue},
|
|||
|
'psa-nosuper' => q{Aucune correspondance - merci de m'ex??cuter en tant que superutilisateur},
|
|||
|
'qtime-count-msg' => q{Requêtes totales : $1},
|
|||
|
'qtime-count-none' => q{pas plus que $1 requêtes},
|
|||
|
'qtime-for-msg' => q{$1 requêtes plus longues que $2s, requête la plus longue : $3s$4 $5},
|
|||
|
'qtime-msg' => q{requête la plus longue : $1s$2 $3},
|
|||
|
'qtime-none' => q{aucune requête},
|
|||
|
'queries' => q{requêtes},
|
|||
|
'query-time' => q{durée de la requête},
|
|||
|
'range-badcs' => q{Option « $1 » invalide : doit être une somme de contrôle},
|
|||
|
'range-badlock' => q{Option « $1 » invalide : doit être un nombre de verrou ou « type1=#;type2=# »},
|
|||
|
'range-badpercent' => q{Option « $1 » invalide : doit être un pourcentage},
|
|||
|
'range-badpercsize' => q{Option « $1 » invalide : doit être une taille ou un pourcentage},
|
|||
|
'range-badsize' => q{Taille invalide pour l'option « $1 »},
|
|||
|
'range-badtype' => q{validate_range appelé avec un type inconnu « $1 »},
|
|||
|
'range-badversion' => q{Chaîne invalide pour l'option « $1 » : $2},
|
|||
|
'range-cactionly' => q{Cette action est pour cacti seulement et ne prend pas les arguments warning et critical},
|
|||
|
'range-int' => q{Argument invalide pour l'option « $1 » : doit être un entier},
|
|||
|
'range-int-pos' => q{Argument invalide pour l'option « $1 » : doit être un entier positif},
|
|||
|
'range-neg-percent' => q{Ne peut pas indiquer un pourcentage négatif !},
|
|||
|
'range-none' => q{Les options warning et critical ne sont pas nécessaires},
|
|||
|
'range-noopt-both' => q{Doit fournir les options warning et critical},
|
|||
|
'range-noopt-one' => q{Doit fournir une option warning ou critical},
|
|||
|
'range-noopt-only' => q{Peut seulement fournir une option warning ou critical},
|
|||
|
'range-noopt-orboth' => q{Doit fournir une option warning, une option critical ou les deux},
|
|||
|
'range-noopt-size' => q{Doit fournir une taille warning et/ou critical},
|
|||
|
'range-nosize' => q{Doit fournir une taille warning et/ou critical},
|
|||
|
'range-notime' => q{Doit fournir une heure warning et/ou critical},
|
|||
|
'range-seconds' => q{Argument invalide pour l'option « $1 » : doit être un nombre de secondes},
|
|||
|
'range-version' => q{doit être dans le format X.Y ou X.Y.Z, où X est le numéro de version majeure, },
|
|||
|
'range-warnbig' => q{L'option warning ne peut pas être plus grand que l'option critical},
|
|||
|
'range-warnbigsize' => q{L'option warning ($1 octets) ne peut pas être plus grand que l'option critical ($2 octets)},
|
|||
|
'range-warnbigtime' => q{L'option warning ($1 s) ne peut pas être plus grand que l'option critical ($2 s)},
|
|||
|
'range-warnsmall' => q{L'option warningne peut pas être plus petit que l'option critical},
|
|||
|
'range-nointfortime' => q{Argument invalide pour l'option '$1' : doit être un entier, une heure ou un entier horaire},
|
|||
|
'relsize-msg-ind' => q{le plus gros index est « $1 » : $2},
|
|||
|
'relsize-msg-reli' => q{la plus grosse relation est l'index « $1 » : $2},
|
|||
|
'relsize-msg-relt' => q{la plus grosse relation est la table « $1 » : $2},
|
|||
|
'relsize-msg-tab' => q{la plus grosse table est « $1 » : $2},
|
|||
|
'rep-badarg' => q{Argument repinfo invalide : 6 valeurs séparées par des virgules attendues},
|
|||
|
'rep-duh' => q{Aucun sens à tester la réplication avec les mêmes valeurs},
|
|||
|
'rep-fail' => q{Ligne non répliquée sur l'esclave $1},
|
|||
|
'rep-noarg' => q{A besoin d'un argument repinfo},
|
|||
|
'rep-norow' => q{Ligne source de la réplication introuvable : $1},
|
|||
|
'rep-noslaves' => q{Aucun esclave trouvé},
|
|||
|
'rep-notsame' => q{Ne peut pas tester la réplication : les valeurs ne sont pas identiques},
|
|||
|
'rep-ok' => q{La ligne a été répliquée},
|
|||
|
'rep-sourcefail' => q{Échec de la mise à jour de la source},
|
|||
|
'rep-timeout' => q{La ligne n'a pas été répliquée. Délai dépassé : $1},
|
|||
|
'rep-unknown' => q{Échec du test de la réplication},
|
|||
|
'rep-wrongvals' => q{Ne peut pas tester la réplication : les valeurs ne sont pas les bonnes (ni $1 ni $2 ni $3)},
|
|||
|
'runcommand-err' => q{Erreur inconnue de la fonction « run_command »},
|
|||
|
'runcommand-nodb' => q{Aucune base de données cible trouvée},
|
|||
|
'runcommand-nodupe' => q{N'a pas pu dupliqué STDERR},
|
|||
|
'runcommand-noerr' => q{N'a pas pu ouvrir STDERR},
|
|||
|
'runcommand-nosys' => q{Échec de l'appel système avec un $1},
|
|||
|
'runcommand-pgpass' => q{Création du fichier pgpass temporaire $1},
|
|||
|
'runcommand-timeout' => q{Délai épuisée pour la commande ! Essayez d'augmenter --timeout à une valeur plus importante que $1},
|
|||
|
'runtime-badmrtg' => q{queryname invalide ?},
|
|||
|
'runtime-badname' => q{Option invalide pour queryname option : doit être le nom d'une vue},
|
|||
|
'runtime-msg' => q{durée d'exécution de la requête : $1 secondes},
|
|||
|
'schema' => q{Schéma},
|
|||
|
'ss-createfile' => q{Création du fichier $1},
|
|||
|
'ss-different' => q{"$1" est différent:},
|
|||
|
'ss-existson' => q{Existe sur :},
|
|||
|
'ss-failed' => q{Les bases de données sont différentes. Éléments différents : $1},
|
|||
|
'ss-matched' => q{Les bases de données ont les mêmes éléments},
|
|||
|
'ss-missingon' => q{Manque sur :},
|
|||
|
'ss-noexist' => q{$1 "$2" n'existe pas sur toutes les bases de données :},
|
|||
|
'ss-notset' => q{"$1" n'est pas configuré sur toutes les bases de données :},
|
|||
|
'ss-suffix' => q{Erreur : ne peut pas utiliser le suffixe sauf à rechercher des schémas basés sur l'horloge},
|
|||
|
'size' => q{taille},
|
|||
|
'slony-noschema' => q{N'a pas pu déterminer le schéma de Slony},
|
|||
|
'slony-nonumber' => q{L'appel à sl_status n'a pas renvoyé un numéro},
|
|||
|
'slony-lagtime' => q{Durée de lag de Slony : $1},
|
|||
|
'seq-die' => q{N'a pas pu récupérer d'informations sur la séquence $1},
|
|||
|
'seq-msg' => q{$1=$2% (appels restant=$3)},
|
|||
|
'seq-none' => q{Aucune sequences trouvée},
|
|||
|
'symlink-create' => q{Création de « $1 »},
|
|||
|
'symlink-done' => q{Création impossible de « $1 »: $2 est déjà lié à "$3"},
|
|||
|
'symlink-exists' => q{Création impossible de « $1 »: le fichier $2 existe déjà},
|
|||
|
'symlink-fail1' => q{Échec de la suppression de « $1 » : $2},
|
|||
|
'symlink-fail2' => q{N'a pas pu supprimer le lien symbolique $1 vers $2 : $3},
|
|||
|
'symlink-name' => q{Cette commande ne fonctionnera pas sauf si le programme contient le mot « postgres »},
|
|||
|
'symlink-unlink' => q{Supression de « $1 » :$2 },
|
|||
|
'table' => q{Table},
|
|||
|
'testmode-end' => q{FIN DU MODE DE TEST},
|
|||
|
'testmode-fail' => q{Échec de la connexion : $1 $2},
|
|||
|
'testmode-norun' => q{N'a pas pu exécuter « $1 » sur $2 : la version doit être supérieure ou égale à $3, mais est $4},
|
|||
|
'testmode-noset' => q{N'a pas pu exécuter « $1 » sur $2 : $3 n'est pas activé},
|
|||
|
'testmode-nover' => q{N'a pas pu trouver la version de $1},
|
|||
|
'testmode-ok' => q{Connexion OK : $1},
|
|||
|
'testmode-start' => q{DÉBUT DU MODE DE TEST},
|
|||
|
'time-day' => q{jour},
|
|||
|
'time-days' => q{jours},
|
|||
|
'time-hour' => q{heure},
|
|||
|
'time-hours' => q{heures},
|
|||
|
'time-minute' => q{minute},
|
|||
|
'time-minutes' => q{minutes},
|
|||
|
'time-month' => q{mois},
|
|||
|
'time-months' => q{mois},
|
|||
|
'time-second' => q{seconde},
|
|||
|
'time-seconds' => q{secondes},
|
|||
|
'time-week' => q{semaine},
|
|||
|
'time-weeks' => q{semaines},
|
|||
|
'time-year' => q{année},
|
|||
|
'time-years' => q{années},
|
|||
|
'timesync-diff' => q{diff},
|
|||
|
'timesync-msg' => q{timediff=$1 Base de données=$2 Local=$3},
|
|||
|
'transactions' => q{transactions},
|
|||
|
'trigger-msg' => q{Triggers désactivés : $1},
|
|||
|
'txn-time' => q{durée de la transaction},
|
|||
|
'txnidle-count-msg' => q{Transactions en attente totales : $1},
|
|||
|
'txnidle-count-none' => q{pas plus de $1 transaction en attente},
|
|||
|
'txnidle-for-msg' => q{$1 transactions en attente plus longues que $2s, transaction la plus longue : $3s$4 $5},
|
|||
|
'txnidle-msg' => q{transaction en attente la plus longue : $1s$2 $3},
|
|||
|
'txnidle-none' => q{Aucun processus en attente dans une transaction},
|
|||
|
'txntime-count-msg' => q{Transactions totales : $1},
|
|||
|
'txntime-count-none' => q{pas plus que $1 transactions},
|
|||
|
'txntime-for-msg' => q{$1 transactions plus longues que $2s, transaction la plus longue : $3s$4 $5},
|
|||
|
'txntime-msg' => q{Transaction la plus longue : $1s$2 $3},
|
|||
|
'txntime-none' => q{Aucune transaction},
|
|||
|
'txnwrap-cbig' => q{La valeur critique doit être inférieure à 2 milliards},
|
|||
|
'txnwrap-wbig' => q{La valeur d'avertissement doit être inférieure à 2 milliards},
|
|||
|
'unknown-error' => q{erreur inconnue},
|
|||
|
'usage' => qq{\nUsage: \$1 <options>\n Essayez « \$1 --help » pour liste complète des options\n\n},
|
|||
|
'username' => q{nom utilisateur},
|
|||
|
'user' => q{Utilisateur},
|
|||
|
'vac-nomatch-a' => q{Aucune des tables correspondantes n'a eu d'opération ANALYZE},
|
|||
|
'vac-nomatch-v' => q{Aucune des tables correspondantes n'a eu d'opération VACUUM},
|
|||
|
'version' => q{version $1},
|
|||
|
'version-badmrtg' => q{Argument invalide pour la version de mrtg},
|
|||
|
'version-fail' => q{version $1, alors que la version attendue est $2},
|
|||
|
'version-ok' => q{version $1},
|
|||
|
'wal-numfound' => q{Fichiers WAL trouvés : $1},
|
|||
|
'wal-numfound2' => q{Fichiers WAL "$2" trouvés : $1},
|
|||
|
},
|
|||
|
'af' => {
|
|||
|
},
|
|||
|
'cs' => {
|
|||
|
'checkpoint-po' => q{<EFBFBD>as posledn<64>ho kontroln<6C>ho bodu:},
|
|||
|
},
|
|||
|
'de' => {
|
|||
|
'backends-po' => q{tut mir leid, schon zu viele Verbindungen},
|
|||
|
'checkpoint-po' => q{Zeit des letzten Checkpoints:},
|
|||
|
},
|
|||
|
'es' => {
|
|||
|
'backends-po' => q{lo siento, ya tenemos demasiados clientes},
|
|||
|
'checkpoint-po' => q{Instante de <20>ltimo checkpoint:},
|
|||
|
},
|
|||
|
'fa' => {
|
|||
|
'checkpoint-po' => q{زمان آخرین وارسی:},
|
|||
|
},
|
|||
|
'hr' => {
|
|||
|
'backends-po' => q{nažalost, već je otvoreno previše klijentskih veza},
|
|||
|
},
|
|||
|
'hu' => {
|
|||
|
'checkpoint-po' => q{A legut<75>bbi ellen<65>rz<72>pont ideje:},
|
|||
|
},
|
|||
|
'it' => {
|
|||
|
'checkpoint-po' => q{Orario ultimo checkpoint:},
|
|||
|
},
|
|||
|
'ja' => {
|
|||
|
'backends-po' => q{現在クライアント数が多すぎます},
|
|||
|
'checkpoint-po' => q{最終チェックポイント時刻:},
|
|||
|
},
|
|||
|
'ko' => {
|
|||
|
'backends-po' => q{최대 동시 접속자 수를 초과했습니다.},
|
|||
|
'checkpoint-po' => q{<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> üũ<C3BC><C5A9><EFBFBD><EFBFBD>Ʈ <20>ð<EFBFBD>:},
|
|||
|
},
|
|||
|
'nb' => {
|
|||
|
'backends-po' => q{beklager, for mange klienter},
|
|||
|
'checkpoint-po' => q{Tidspunkt for nyeste kontrollpunkt:},
|
|||
|
},
|
|||
|
'nl' => {
|
|||
|
},
|
|||
|
'pl' => {
|
|||
|
'checkpoint-po' => q{Czas najnowszego punktu kontrolnego:},
|
|||
|
},
|
|||
|
'pt_BR' => {
|
|||
|
'backends-po' => q{desculpe, muitos clientes conectados},
|
|||
|
'checkpoint-po' => q{Hora do último ponto de controle:},
|
|||
|
},
|
|||
|
'ro' => {
|
|||
|
'checkpoint-po' => q{Timpul ultimului punct de control:},
|
|||
|
},
|
|||
|
'ru' => {
|
|||
|
'backends-po' => q{<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>},
|
|||
|
'checkpoint-po' => q{<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> checkpoint:},
|
|||
|
},
|
|||
|
'sk' => {
|
|||
|
'backends-po' => q{je mi <20><>to, je u<> pr<70>li<6C> ve<76>a klientov},
|
|||
|
'checkpoint-po' => q{Čas posledného kontrolného bodu:},
|
|||
|
},
|
|||
|
'sl' => {
|
|||
|
'backends-po' => q{povezanih je <20>e preve<76> odjemalcev},
|
|||
|
'checkpoint-po' => q{<EFBFBD>as zadnje kontrolne to<74>ke ............},
|
|||
|
},
|
|||
|
'sv' => {
|
|||
|
'backends-po' => q{ledsen, f<>r m<>nga klienter},
|
|||
|
'checkpoint-po' => q{Tidpunkt f<>r senaste kontrollpunkt:},
|
|||
|
},
|
|||
|
'ta' => {
|
|||
|
'checkpoint-po' => q{நவீன சோதனை மையத்தின் நேரம்:},
|
|||
|
},
|
|||
|
'tr' => {
|
|||
|
'backends-po' => q{üzgünüm, istemci sayısı çok fazla},
|
|||
|
'checkpoint-po' => q{En son checkpoint'in zamanı:},
|
|||
|
},
|
|||
|
'zh_CN' => {
|
|||
|
'backends-po' => q{<EFBFBD>Բ<EFBFBD><EFBFBD><EFBFBD>, <20>Ѿ<EFBFBD><D1BE><EFBFBD>̫<EFBFBD><CCAB><EFBFBD>Ŀͻ<C4BF>},
|
|||
|
'checkpoint-po' => q{<EFBFBD><EFBFBD><EFBFBD>¼<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʱ<EFBFBD><EFBFBD>:},
|
|||
|
},
|
|||
|
'zh_TW' => {
|
|||
|
'backends-po' => q{對不起,用戶端過多},
|
|||
|
'checkpoint-po' => q{最新的檢查點時間:},
|
|||
|
},
|
|||
|
);
|
|||
|
## use critic
|
|||
|
|
|||
|
our $lang = $ENV{LC_ALL} || $ENV{LC_MESSAGES} || $ENV{LANG} || 'en';
|
|||
|
$lang = substr($lang,0,2);
|
|||
|
|
|||
|
## Messages are stored in these until the final output via finishup()
|
|||
|
our (%ok, %warning, %critical, %unknown);
|
|||
|
|
|||
|
our $ME = basename($0);
|
|||
|
our $ME2 = 'check_postgres.pl';
|
|||
|
our $USAGE = msg('usage', $ME);
|
|||
|
|
|||
|
## This gets turned on for meta-commands which don't hit a Postgres database
|
|||
|
our $nohost = 0;
|
|||
|
|
|||
|
## Global error string, mostly used for MRTG error handling
|
|||
|
our $ERROR = '';
|
|||
|
|
|||
|
$opt{test} = 0;
|
|||
|
$opt{timeout} = 30;
|
|||
|
|
|||
|
## Look for any rc files to control additional parameters
|
|||
|
## Command line options always overwrite these
|
|||
|
## Format of these files is simply name=val
|
|||
|
|
|||
|
## This option must come before the GetOptions call
|
|||
|
for my $arg (@ARGV) {
|
|||
|
if ($arg eq '--no-check_postgresrc') {
|
|||
|
$opt{'no-check_postgresrc'} = 1;
|
|||
|
last;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
## Used by same_schema in the find_catalog_info sub
|
|||
|
my %catalog_info = (
|
|||
|
|
|||
|
user => {
|
|||
|
SQL => q{
|
|||
|
SELECT *, usename AS name, quote_ident(usename) AS safeusename
|
|||
|
FROM pg_user},
|
|||
|
deletecols => [ qw{ passwd } ],
|
|||
|
},
|
|||
|
|
|||
|
schema => {
|
|||
|
SQL => q{
|
|||
|
SELECT n.oid, quote_ident(nspname) AS name, quote_ident(usename) AS owner, nspacl
|
|||
|
FROM pg_namespace n
|
|||
|
JOIN pg_user u ON (u.usesysid = n.nspowner)},
|
|||
|
deletecols => [ ],
|
|||
|
exclude => 'temp_schemas',
|
|||
|
},
|
|||
|
language => {
|
|||
|
SQL => q{
|
|||
|
SELECT l.*, lanname AS name, quote_ident(usename) AS owner
|
|||
|
FROM pg_language l
|
|||
|
JOIN pg_user u ON (u.usesysid = l.lanowner)},
|
|||
|
SQL2 => q{
|
|||
|
SELECT l.*, lanname AS name
|
|||
|
FROM pg_language l
|
|||
|
},
|
|||
|
},
|
|||
|
type => {
|
|||
|
SQL => q{
|
|||
|
SELECT t.oid AS oid, t.*, quote_ident(usename) AS owner, quote_ident(nspname) AS schema,
|
|||
|
nspname||'.'||typname AS name
|
|||
|
FROM pg_type t
|
|||
|
JOIN pg_user u ON (u.usesysid = t.typowner)
|
|||
|
JOIN pg_namespace n ON (n.oid = t.typnamespace)
|
|||
|
WHERE t.typtype NOT IN ('b','c')},
|
|||
|
exclude => 'system',
|
|||
|
},
|
|||
|
sequence => {
|
|||
|
SQL => q{
|
|||
|
SELECT c.*, nspname||'.'||relname AS name, quote_ident(usename) AS owner,
|
|||
|
(quote_ident(nspname)||'.'||quote_ident(relname)) AS safename,
|
|||
|
quote_ident(nspname) AS schema
|
|||
|
FROM pg_class c
|
|||
|
JOIN pg_user u ON (u.usesysid = c.relowner)
|
|||
|
JOIN pg_namespace n ON (n.oid = c.relnamespace)
|
|||
|
WHERE c.relkind = 'S'},
|
|||
|
innerSQL => 'SELECT * FROM ROWSAFENAME',
|
|||
|
},
|
|||
|
view => {
|
|||
|
SQL => q{
|
|||
|
SELECT c.*, nspname||'.'||relname AS name, quote_ident(usename) AS owner,
|
|||
|
quote_ident(relname) AS safename, quote_ident(nspname) AS schema,
|
|||
|
TRIM(pg_get_viewdef(c.oid, TRUE)) AS viewdef, spcname AS tablespace
|
|||
|
FROM pg_class c
|
|||
|
JOIN pg_user u ON (u.usesysid = c.relowner)
|
|||
|
JOIN pg_namespace n ON (n.oid = c.relnamespace)
|
|||
|
LEFT JOIN pg_tablespace s ON (s.oid = c.reltablespace)
|
|||
|
WHERE c.relkind = 'v'},
|
|||
|
exclude => 'system',
|
|||
|
},
|
|||
|
table => {
|
|||
|
SQL => q{
|
|||
|
SELECT c.*, nspname||'.'||relname AS name, quote_ident(usename) AS owner,
|
|||
|
quote_ident(relname) AS safename, quote_ident(nspname) AS schema,
|
|||
|
spcname AS tablespace
|
|||
|
FROM pg_class c
|
|||
|
JOIN pg_user u ON (u.usesysid = c.relowner)
|
|||
|
JOIN pg_namespace n ON (n.oid = c.relnamespace)
|
|||
|
LEFT JOIN pg_tablespace s ON (s.oid = c.reltablespace)
|
|||
|
WHERE c.relkind = 'r'},
|
|||
|
exclude => 'system',
|
|||
|
},
|
|||
|
index => {
|
|||
|
SQL => q{
|
|||
|
SELECT c.*, i.*, nspname||'.'||relname AS name, quote_ident(usename) AS owner,
|
|||
|
quote_ident(relname) AS safename, quote_ident(nspname) AS schema,
|
|||
|
spcname AS tablespace, amname,
|
|||
|
pg_get_indexdef(c.oid) AS indexdef
|
|||
|
FROM pg_class c
|
|||
|
JOIN pg_user u ON (u.usesysid = c.relowner)
|
|||
|
JOIN pg_namespace n ON (n.oid = c.relnamespace)
|
|||
|
JOIN pg_index i ON (c.oid = i.indexrelid)
|
|||
|
LEFT JOIN pg_tablespace s ON (s.oid = c.reltablespace)
|
|||
|
LEFT JOIN pg_am a ON (a.oid = c.relam)
|
|||
|
WHERE c.relkind = 'i'},
|
|||
|
exclude => 'system',
|
|||
|
},
|
|||
|
operator => {
|
|||
|
SQL => q{
|
|||
|
SELECT o.*, o.oid, nspname||'.'||o.oprname AS name, quote_ident(o.oprname) AS safename,
|
|||
|
usename AS owner, nspname AS schema,
|
|||
|
t1.typname AS resultname,
|
|||
|
t2.typname AS leftname, t3.typname AS rightname
|
|||
|
FROM pg_operator o
|
|||
|
JOIN pg_user u ON (u.usesysid = o.oprowner)
|
|||
|
JOIN pg_namespace n ON (n.oid = o.oprnamespace)
|
|||
|
JOIN pg_proc p1 ON (p1.oid = o.oprcode)
|
|||
|
JOIN pg_type t1 ON (t1.oid = o.oprresult)
|
|||
|
LEFT JOIN pg_type t2 ON (t2.oid = o.oprleft)
|
|||
|
LEFT JOIN pg_type t3 ON (t3.oid = o.oprright)},
|
|||
|
exclude => 'system',
|
|||
|
},
|
|||
|
trigger => {
|
|||
|
SQL => q{
|
|||
|
SELECT t.*, n1.nspname||'.'||t.tgname AS name, quote_ident(t.tgname) AS safename, quote_ident(usename) AS owner,
|
|||
|
n1.nspname AS tschema, c1.relname AS tname,
|
|||
|
n2.nspname AS cschema, c2.relname AS cname,
|
|||
|
n3.nspname AS procschema, p.proname AS procname
|
|||
|
FROM pg_trigger t
|
|||
|
JOIN pg_class c1 ON (c1.oid = t.tgrelid)
|
|||
|
JOIN pg_user u ON (u.usesysid = c1.relowner)
|
|||
|
JOIN pg_namespace n1 ON (n1.oid = c1.relnamespace)
|
|||
|
JOIN pg_proc p ON (p.oid = t.tgfoid)
|
|||
|
JOIN pg_namespace n3 ON (n3.oid = p.pronamespace)
|
|||
|
LEFT JOIN pg_class c2 ON (c2.oid = t.tgconstrrelid)
|
|||
|
LEFT JOIN pg_namespace n2 ON (n2.oid = c2.relnamespace)
|
|||
|
WHERE t.tgconstrrelid = 0 AND tgname !~ '^pg_'},
|
|||
|
},
|
|||
|
function => {
|
|||
|
SQL => q{
|
|||
|
SELECT p.*, p.oid, nspname||'.'||p.proname AS name, quote_ident(p.proname) AS safename,
|
|||
|
md5(prosrc) AS source_checksum,
|
|||
|
usename AS owner, nspname AS schema
|
|||
|
FROM pg_proc p
|
|||
|
JOIN pg_user u ON (u.usesysid = p.proowner)
|
|||
|
JOIN pg_namespace n ON (n.oid = p.pronamespace)},
|
|||
|
exclude => 'system',
|
|||
|
},
|
|||
|
constraint => {
|
|||
|
SQL => q{
|
|||
|
SELECT c.*, c.oid, n.nspname||'.'||c.conname AS name, quote_ident(c.conname) AS safename,
|
|||
|
n.nspname AS schema, relname AS tname
|
|||
|
FROM pg_constraint c
|
|||
|
JOIN pg_namespace n ON (n.oid = c.connamespace)
|
|||
|
JOIN pg_class r ON (r.oid = c.conrelid)
|
|||
|
JOIN pg_namespace n2 ON (n2.oid = r.relnamespace)},
|
|||
|
exclude => 'system',
|
|||
|
},
|
|||
|
column => {
|
|||
|
SQL => q{
|
|||
|
SELECT a.*, n.nspname||'.'||c.relname||'.'||attname AS name, quote_ident(a.attname) AS safename,
|
|||
|
n.nspname||'.'||c.relname AS tname,
|
|||
|
typname, quote_ident(nspname) AS schema,
|
|||
|
pg_get_expr(d.adbin, a.attrelid, true) AS default
|
|||
|
FROM pg_attribute a
|
|||
|
JOIN pg_type t ON (t.oid = a.atttypid)
|
|||
|
JOIN pg_class c ON (c.oid = a.attrelid AND c.relkind = 'r')
|
|||
|
JOIN pg_namespace n ON (n.oid = c.relnamespace)
|
|||
|
LEFT JOIN pg_attrdef d ON (d.adrelid = a.attrelid AND d.adnum = a.attnum)
|
|||
|
WHERE attnum >= 1
|
|||
|
AND NOT attisdropped},
|
|||
|
postSQL => q{ORDER BY n.nspname, c.relname, a.attnum},
|
|||
|
exclude => 'system',
|
|||
|
},
|
|||
|
);
|
|||
|
|
|||
|
my $rcfile;
|
|||
|
if (! $opt{'no-check_postgresrc'}) {
|
|||
|
if (-e '.check_postgresrc') {
|
|||
|
$rcfile = '.check_postgresrc';
|
|||
|
}
|
|||
|
elsif (-e "$ENV{HOME}/.check_postgresrc") {
|
|||
|
$rcfile = "$ENV{HOME}/.check_postgresrc";
|
|||
|
}
|
|||
|
elsif (-e '/etc/check_postgresrc') {
|
|||
|
$rcfile = '/etc/check_postgresrc';
|
|||
|
}
|
|||
|
elsif (-e '/usr/local/etc/check_postgresrc') {
|
|||
|
$rcfile = '/usr/local/etc/check_postgresrc';
|
|||
|
}
|
|||
|
}
|
|||
|
## We need a temporary hash so that multi-value options can be overridden on the command line
|
|||
|
my %tempopt;
|
|||
|
if (defined $rcfile) {
|
|||
|
open my $rc, '<', $rcfile or die qq{Could not open "$rcfile": $!\n};
|
|||
|
RCLINE:
|
|||
|
while (<$rc>) {
|
|||
|
next if /^\s*#/;
|
|||
|
next unless /^\s*(\w+)\s*=\s*(.+?)\s*$/o;
|
|||
|
my ($name,$value) = ($1,$2); ## no critic (ProhibitCaptureWithoutTest)
|
|||
|
## Map alternate option spellings to preferred names
|
|||
|
if ($name eq 'dbport' or $name eq 'p' or $name eq 'dbport1' or $name eq 'p1' or $name eq 'port1') {
|
|||
|
$name = 'port';
|
|||
|
}
|
|||
|
elsif ($name eq 'dbhost' or $name eq 'H' or $name eq 'dbhost1' or $name eq 'H1' or $name eq 'host1') {
|
|||
|
$name = 'host';
|
|||
|
}
|
|||
|
elsif ($name eq 'db' or $name eq 'db1' or $name eq 'dbname1') {
|
|||
|
$name = 'dbname';
|
|||
|
}
|
|||
|
elsif ($name eq 'u' or $name eq 'u1' or $name eq 'dbuser1') {
|
|||
|
$name = 'dbuser';
|
|||
|
}
|
|||
|
## Now for all the additional non-1 databases
|
|||
|
elsif ($name =~ /^dbport(\d+)$/o or $name eq /^p(\d+)$/o) {
|
|||
|
$name = "port$1";
|
|||
|
}
|
|||
|
elsif ($name =~ /^dbhost(\d+)$/o or $name eq /^H(\d+)$/o) {
|
|||
|
$name = "host$1";
|
|||
|
}
|
|||
|
elsif ($name =~ /^db(\d)$/o) {
|
|||
|
$name = "dbname$1";
|
|||
|
}
|
|||
|
elsif ($name =~ /^u(\d+)$/o) {
|
|||
|
$name = "dbuser$1";
|
|||
|
}
|
|||
|
|
|||
|
## These options are multiples ('@s')
|
|||
|
for my $arr (qw/include exclude includeuser excludeuser host port
|
|||
|
dbuser dbname dbpass dbservice schema/) {
|
|||
|
next if $name ne $arr and $name ne "${arr}2";
|
|||
|
push @{$tempopt{$name}} => $value;
|
|||
|
## Don't set below as a normal value
|
|||
|
next RCLINE;
|
|||
|
}
|
|||
|
$opt{$name} = $value;
|
|||
|
}
|
|||
|
close $rc or die;
|
|||
|
}
|
|||
|
|
|||
|
die $USAGE if ! @ARGV;
|
|||
|
|
|||
|
GetOptions(
|
|||
|
\%opt,
|
|||
|
'version|V',
|
|||
|
'verbose|v+',
|
|||
|
'vv',
|
|||
|
'help|h',
|
|||
|
'quiet|q',
|
|||
|
'man',
|
|||
|
'output=s',
|
|||
|
'simple',
|
|||
|
'showperf=i',
|
|||
|
'perflimit=i',
|
|||
|
'showtime=i',
|
|||
|
'timeout|t=i',
|
|||
|
'test',
|
|||
|
'symlinks',
|
|||
|
'debugoutput=s',
|
|||
|
'no-check_postgresrc',
|
|||
|
'assume-standby-mode',
|
|||
|
'assume-prod',
|
|||
|
|
|||
|
'action=s',
|
|||
|
'warning=s',
|
|||
|
'critical=s',
|
|||
|
'include=s@',
|
|||
|
'exclude=s@',
|
|||
|
'includeuser=s@',
|
|||
|
'excludeuser=s@',
|
|||
|
|
|||
|
'host|dbhost|H|dbhost1|H1=s@',
|
|||
|
'port|dbport|p|port1|dbport1|p1=s@',
|
|||
|
'dbname|db|dbname1|db1=s@',
|
|||
|
'dbuser|u|dbuser1|u1=s@',
|
|||
|
'dbpass|dbpass1=s@',
|
|||
|
'dbservice|dbservice1=s@',
|
|||
|
|
|||
|
'PSQL=s',
|
|||
|
|
|||
|
'tempdir=s',
|
|||
|
'get_method=s',
|
|||
|
'language=s',
|
|||
|
'mrtg=s', ## used by MRTG checks only
|
|||
|
'logfile=s', ## used by check_logfile only
|
|||
|
'queryname=s', ## used by query_runtime only
|
|||
|
'query=s', ## used by custom_query only
|
|||
|
'valtype=s', ## used by custom_query only
|
|||
|
'reverse', ## used by custom_query only
|
|||
|
'repinfo=s', ## used by replicate_row only
|
|||
|
'noidle', ## used by backends only
|
|||
|
'datadir=s', ## used by checkpoint only
|
|||
|
'schema=s@', ## used by slony_status only
|
|||
|
'filter=s@', ## used by same_schema only
|
|||
|
'suffix=s', ## used by same_schema only
|
|||
|
'replace', ## used by same_schema only
|
|||
|
);
|
|||
|
|
|||
|
die $USAGE if ! keys %opt and ! @ARGV;
|
|||
|
|
|||
|
## Process the args that are not so easy for Getopt::Long
|
|||
|
my @badargs;
|
|||
|
|
|||
|
while (my $arg = pop @ARGV) {
|
|||
|
|
|||
|
## These must be of the form x=y
|
|||
|
if ($arg =~ /^\-?\-?(\w+)\s*=\s*(.+)/o) {
|
|||
|
my ($name,$value) = (lc $1, $2);
|
|||
|
if ($name =~ /^(?:db)?port(\d+)$/o or $name =~ /^p(\d+)$/o) {
|
|||
|
$opt{"port$1"} = $value;
|
|||
|
}
|
|||
|
elsif ($name =~ /^(?:db)?host(\d+)$/o or $name =~ /^H(\d+)$/o) {
|
|||
|
$opt{"host$1"} = $value;
|
|||
|
}
|
|||
|
elsif ($name =~ /^db(?:name)?(\d+)$/o) {
|
|||
|
$opt{"dbname$1"} = $value;
|
|||
|
}
|
|||
|
elsif ($name =~ /^dbuser(\d+)$/o or $name =~ /^u(\d+)/o) {
|
|||
|
$opt{"dbuser$1"} = $value;
|
|||
|
}
|
|||
|
elsif ($name =~ /^dbpass(\d+)$/o) {
|
|||
|
$opt{"dbpass$1"} = $value;
|
|||
|
}
|
|||
|
elsif ($name =~ /^dbservice(\d+)$/o) {
|
|||
|
$opt{"dbservice$1"} = $value;
|
|||
|
}
|
|||
|
else {
|
|||
|
push @badargs => $arg;
|
|||
|
}
|
|||
|
next;
|
|||
|
}
|
|||
|
push @badargs => $arg;
|
|||
|
}
|
|||
|
|
|||
|
if (@badargs) {
|
|||
|
warn "Invalid arguments:\n";
|
|||
|
for (@badargs) {
|
|||
|
warn " $_\n";
|
|||
|
}
|
|||
|
die $USAGE;
|
|||
|
}
|
|||
|
|
|||
|
if ( $opt{man} ) {
|
|||
|
require Pod::Usage;
|
|||
|
Pod::Usage::pod2usage({-verbose => 2});
|
|||
|
exit;
|
|||
|
}
|
|||
|
|
|||
|
## Put multi-val options from check_postgresrc in place, only if no command-line args!
|
|||
|
for my $mv (keys %tempopt) {
|
|||
|
$opt{$mv} ||= delete $tempopt{$mv};
|
|||
|
}
|
|||
|
|
|||
|
our $VERBOSE = $opt{verbose} || 0;
|
|||
|
$VERBOSE = 5 if $opt{vv};
|
|||
|
|
|||
|
our $OUTPUT = lc($opt{output} || '');
|
|||
|
|
|||
|
## Allow the optimization of the get_methods list by an argument
|
|||
|
if ($opt{get_method}) {
|
|||
|
my $found = 0;
|
|||
|
for my $meth (@get_methods) {
|
|||
|
if ($meth =~ /^$opt{get_method}/io) {
|
|||
|
@get_methods = ($meth);
|
|||
|
$found = 1;
|
|||
|
last;
|
|||
|
}
|
|||
|
}
|
|||
|
if (!$found) {
|
|||
|
print "Unknown value for get_method: $opt{get_method}\n";
|
|||
|
print "Valid choices are:\n";
|
|||
|
print (join "\n" => map { s/(\w+).*/$1/; $_ } @get_methods);
|
|||
|
print "\n";
|
|||
|
exit;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
## Allow the language to be changed by an explicit option
|
|||
|
if ($opt{language}) {
|
|||
|
$lang = substr($opt{language},0,2);
|
|||
|
}
|
|||
|
|
|||
|
## Output the actual string returned by psql in the normal output
|
|||
|
## Argument is 'a' for all, 'w' for warning, 'c' for critical, 'u' for unknown
|
|||
|
## Can be grouped together
|
|||
|
our $DEBUGOUTPUT = $opt{debugoutput} || '';
|
|||
|
our $DEBUG_INFO = '?';
|
|||
|
|
|||
|
## If not explicitly given an output, check the current directory,
|
|||
|
## then fall back to the default.
|
|||
|
|
|||
|
if (!$OUTPUT) {
|
|||
|
my $dir = getcwd;
|
|||
|
if ($dir =~ /(nagios|mrtg|simple|cacti)/io) {
|
|||
|
$OUTPUT = lc $1;
|
|||
|
}
|
|||
|
elsif ($opt{simple}) {
|
|||
|
$OUTPUT = 'simple';
|
|||
|
}
|
|||
|
else {
|
|||
|
$OUTPUT = $DEFAULT_OUTPUT;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
## Extract transforms from the output
|
|||
|
$opt{transform} = '';
|
|||
|
if ($OUTPUT =~ /\b(kb|mb|gb|tb|eb)\b/) {
|
|||
|
$opt{transform} = uc $1;
|
|||
|
}
|
|||
|
if ($OUTPUT =~ /(nagios|mrtg|simple|cacti)/io) {
|
|||
|
$OUTPUT = lc $1;
|
|||
|
}
|
|||
|
## Check for a valid output setting
|
|||
|
if ($OUTPUT ne 'nagios' and $OUTPUT ne 'mrtg' and $OUTPUT ne 'simple' and $OUTPUT ne 'cacti') {
|
|||
|
die msgn('opt-output-invalid');
|
|||
|
}
|
|||
|
|
|||
|
our $MRTG = ($OUTPUT eq 'mrtg' or $OUTPUT eq 'simple') ? 1 : 0;
|
|||
|
our (%stats, %statsmsg);
|
|||
|
our $SIMPLE = $OUTPUT eq 'simple' ? 1 : 0;
|
|||
|
|
|||
|
## See if we need to invoke something based on our name
|
|||
|
our $action = $opt{action} || '';
|
|||
|
if ($ME =~ /check_postgres_(\w+)/ and ! defined $opt{action}) {
|
|||
|
$action = $1;
|
|||
|
}
|
|||
|
|
|||
|
$VERBOSE >= 3 and warn Dumper \%opt;
|
|||
|
|
|||
|
if ($opt{version}) {
|
|||
|
print qq{$ME2 version $VERSION\n};
|
|||
|
exit 0;
|
|||
|
}
|
|||
|
|
|||
|
## Quick hash to put normal action information in one place:
|
|||
|
our $action_info = {
|
|||
|
# Name # clusterwide? # helpstring
|
|||
|
archive_ready => [1, 'Check the number of WAL files ready in the pg_xlog/archive_status'],
|
|||
|
autovac_freeze => [1, 'Checks how close databases are to autovacuum_freeze_max_age.'],
|
|||
|
backends => [1, 'Number of connections, compared to max_connections.'],
|
|||
|
bloat => [0, 'Check for table and index bloat.'],
|
|||
|
checkpoint => [1, 'Checks how long since the last checkpoint'],
|
|||
|
cluster_id => [1, 'Checks the Database System Identifier'],
|
|||
|
commitratio => [0, 'Report if the commit ratio of a database is too low.'],
|
|||
|
connection => [0, 'Simple connection check.'],
|
|||
|
custom_query => [0, 'Run a custom query.'],
|
|||
|
database_size => [0, 'Report if a database is too big.'],
|
|||
|
dbstats => [1, 'Returns stats from pg_stat_database: Cacti output only'],
|
|||
|
disabled_triggers => [0, 'Check if any triggers are disabled'],
|
|||
|
disk_space => [1, 'Checks space of local disks Postgres is using.'],
|
|||
|
fsm_pages => [1, 'Checks percentage of pages used in free space map.'],
|
|||
|
fsm_relations => [1, 'Checks percentage of relations used in free space map.'],
|
|||
|
hitratio => [0, 'Report if the hit ratio of a database is too low.'],
|
|||
|
hot_standby_delay => [1, 'Check the replication delay in hot standby setup'],
|
|||
|
index_size => [0, 'Checks the size of indexes only.'],
|
|||
|
table_size => [0, 'Checks the size of tables only.'],
|
|||
|
relation_size => [0, 'Checks the size of tables and indexes.'],
|
|||
|
last_analyze => [0, 'Check the maximum time in seconds since any one table has been analyzed.'],
|
|||
|
last_vacuum => [0, 'Check the maximum time in seconds since any one table has been vacuumed.'],
|
|||
|
last_autoanalyze => [0, 'Check the maximum time in seconds since any one table has been autoanalyzed.'],
|
|||
|
last_autovacuum => [0, 'Check the maximum time in seconds since any one table has been autovacuumed.'],
|
|||
|
listener => [0, 'Checks for specific listeners.'],
|
|||
|
locks => [0, 'Checks the number of locks.'],
|
|||
|
logfile => [1, 'Checks that the logfile is being written to correctly.'],
|
|||
|
new_version_bc => [0, 'Checks if a newer version of Bucardo is available.'],
|
|||
|
new_version_box => [0, 'Checks if a newer version of boxinfo is available.'],
|
|||
|
new_version_cp => [0, 'Checks if a newer version of check_postgres.pl is available.'],
|
|||
|
new_version_pg => [0, 'Checks if a newer version of Postgres is available.'],
|
|||
|
new_version_tnm => [0, 'Checks if a newer version of tail_n_mail is available.'],
|
|||
|
pgb_pool_cl_active => [1, 'Check the number of active clients in each pgbouncer pool.'],
|
|||
|
pgb_pool_cl_waiting => [1, 'Check the number of waiting clients in each pgbouncer pool.'],
|
|||
|
pgb_pool_sv_active => [1, 'Check the number of active server connections in each pgbouncer pool.'],
|
|||
|
pgb_pool_sv_idle => [1, 'Check the number of idle server connections in each pgbouncer pool.'],
|
|||
|
pgb_pool_sv_used => [1, 'Check the number of used server connections in each pgbouncer pool.'],
|
|||
|
pgb_pool_sv_tested => [1, 'Check the number of tested server connections in each pgbouncer pool.'],
|
|||
|
pgb_pool_sv_login => [1, 'Check the number of login server connections in each pgbouncer pool.'],
|
|||
|
pgb_pool_maxwait => [1, 'Check the current maximum wait time for client connections in pgbouncer pools.'],
|
|||
|
pgbouncer_backends => [0, 'Check how many clients are connected to pgbouncer compared to max_client_conn.'],
|
|||
|
pgbouncer_checksum => [0, 'Check that no pgbouncer settings have changed since the last check.'],
|
|||
|
prepared_txns => [1, 'Checks number and age of prepared transactions.'],
|
|||
|
query_runtime => [0, 'Check how long a specific query takes to run.'],
|
|||
|
query_time => [1, 'Checks the maximum running time of current queries.'],
|
|||
|
replicate_row => [0, 'Verify a simple update gets replicated to another server.'],
|
|||
|
same_schema => [0, 'Verify that two databases have the exact same tables, columns, etc.'],
|
|||
|
sequence => [0, 'Checks remaining calls left in sequences.'],
|
|||
|
settings_checksum => [0, 'Check that no settings have changed since the last check.'],
|
|||
|
slony_status => [1, 'Ensure Slony is up to date via sl_status.'],
|
|||
|
timesync => [0, 'Compare database time to local system time.'],
|
|||
|
txn_idle => [1, 'Checks the maximum "idle in transaction" time.'],
|
|||
|
txn_time => [1, 'Checks the maximum open transaction time.'],
|
|||
|
txn_wraparound => [1, 'See how close databases are getting to transaction ID wraparound.'],
|
|||
|
version => [1, 'Check for proper Postgres version.'],
|
|||
|
wal_files => [1, 'Check the number of WAL files in the pg_xlog directory'],
|
|||
|
};
|
|||
|
|
|||
|
## XXX Need to i18n the above
|
|||
|
our $action_usage = '';
|
|||
|
our $longname = 1;
|
|||
|
for (keys %$action_info) {
|
|||
|
$longname = length($_) if length($_) > $longname;
|
|||
|
}
|
|||
|
for (sort keys %$action_info) {
|
|||
|
$action_usage .= sprintf " %-*s - %s\n", 2+$longname, $_, $action_info->{$_}[1];
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
if ($opt{help}) {
|
|||
|
print qq{Usage: $ME2 <options>
|
|||
|
Run various tests against one or more Postgres databases.
|
|||
|
Returns with an exit code of 0 (success), 1 (warning), 2 (critical), or 3 (unknown)
|
|||
|
This is version $VERSION.
|
|||
|
|
|||
|
Common connection options:
|
|||
|
-H, --host=NAME hostname(s) to connect to; defaults to none (Unix socket)
|
|||
|
-p, --port=NUM port(s) to connect to; defaults to $opt{defaultport}.
|
|||
|
-db, --dbname=NAME database name(s) to connect to; defaults to 'postgres' or 'template1'
|
|||
|
-u --dbuser=NAME database user(s) to connect as; defaults to '$opt{defaultuser}'
|
|||
|
--dbpass=PASS database password(s); use a .pgpass file instead when possible
|
|||
|
--dbservice=NAME service name to use inside of pg_service.conf
|
|||
|
|
|||
|
Connection options can be grouped: --host=a,b --host=c --port=1234 --port=3344
|
|||
|
would connect to a-1234, b-1234, and c-3344
|
|||
|
|
|||
|
Limit options:
|
|||
|
-w value, --warning=value the warning threshold, range depends on the action
|
|||
|
-c value, --critical=value the critical threshold, range depends on the action
|
|||
|
--include=name(s) items to specifically include (e.g. tables), depends on the action
|
|||
|
--exclude=name(s) items to specifically exclude (e.g. tables), depends on the action
|
|||
|
--includeuser=include objects owned by certain users
|
|||
|
--excludeuser=exclude objects owned by certain users
|
|||
|
|
|||
|
Other options:
|
|||
|
--assume-standby-mode assume that server in continious WAL recovery mode
|
|||
|
--assume-prod assume that server in production mode
|
|||
|
--PSQL=FILE location of the psql executable; avoid using if possible
|
|||
|
-v, --verbose verbosity level; can be used more than once to increase the level
|
|||
|
-h, --help display this help information
|
|||
|
--man display the full manual
|
|||
|
-t X, --timeout=X how long in seconds before we timeout. Defaults to 30 seconds.
|
|||
|
--symlinks create named symlinks to the main program for each action
|
|||
|
|
|||
|
Actions:
|
|||
|
Which test is determined by the --action option, or by the name of the program
|
|||
|
$action_usage
|
|||
|
|
|||
|
For a complete list of options and full documentation, view the manual.
|
|||
|
|
|||
|
$ME --man
|
|||
|
|
|||
|
Or visit: http://bucardo.org/check_postgres/
|
|||
|
|
|||
|
|
|||
|
};
|
|||
|
exit 0;
|
|||
|
}
|
|||
|
|
|||
|
build_symlinks() if $opt{symlinks};
|
|||
|
|
|||
|
$action =~ /\w/ or die $USAGE;
|
|||
|
|
|||
|
## Be nice and figure out what they meant
|
|||
|
$action =~ s/\-/_/g;
|
|||
|
$action = lc $action;
|
|||
|
|
|||
|
## Build symlinked copies of this file
|
|||
|
build_symlinks() if $action =~ /build_symlinks/; ## Does not return, may be 'build_symlinks_force'
|
|||
|
|
|||
|
## Die if Time::HiRes is needed but not found
|
|||
|
if ($opt{showtime}) {
|
|||
|
eval {
|
|||
|
require Time::HiRes;
|
|||
|
import Time::HiRes qw/gettimeofday tv_interval sleep/;
|
|||
|
};
|
|||
|
if ($@) {
|
|||
|
die msg('no-time-hires');
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
## Check the current database mode
|
|||
|
our $STANDBY = 0;
|
|||
|
our $MASTER = 0;
|
|||
|
make_sure_standby_mode() if $opt{'assume-standby-mode'};
|
|||
|
make_sure_prod() if $opt{'assume-prod'};
|
|||
|
|
|||
|
## We don't (usually) want to die, but want a graceful Nagios-like exit instead
|
|||
|
sub ndie {
|
|||
|
eval { File::Temp::cleanup(); };
|
|||
|
my $msg = shift;
|
|||
|
chomp $msg;
|
|||
|
print "ERROR: $msg\n";
|
|||
|
exit 3;
|
|||
|
}
|
|||
|
|
|||
|
sub msg { ## no critic
|
|||
|
|
|||
|
my $name = shift || '?';
|
|||
|
|
|||
|
my $msg = '';
|
|||
|
|
|||
|
if (exists $msg{$lang}{$name}) {
|
|||
|
$msg = $msg{$lang}{$name};
|
|||
|
}
|
|||
|
elsif (exists $msg{'en'}{$name}) {
|
|||
|
$msg = $msg{'en'}{$name};
|
|||
|
}
|
|||
|
else {
|
|||
|
## Allow for non-matches in certain rare cases
|
|||
|
return '' if $opt{nomsgok};
|
|||
|
my $line = (caller)[2];
|
|||
|
die qq{Invalid message "$name" from line $line\n};
|
|||
|
}
|
|||
|
|
|||
|
my $x=1;
|
|||
|
{
|
|||
|
my $val = $_[$x-1];
|
|||
|
$val = '?' if ! defined $val;
|
|||
|
last unless $msg =~ s/\$$x/$val/g;
|
|||
|
$x++;
|
|||
|
redo;
|
|||
|
}
|
|||
|
return $msg;
|
|||
|
|
|||
|
} ## end of msg
|
|||
|
|
|||
|
sub msgn { ## no critic
|
|||
|
return msg(@_) . "\n";
|
|||
|
}
|
|||
|
|
|||
|
sub msg_en {
|
|||
|
|
|||
|
my $name = shift || '?';
|
|||
|
|
|||
|
return $msg{'en'}{$name};
|
|||
|
|
|||
|
} ## end of msg_en
|
|||
|
|
|||
|
## Everything from here on out needs psql, so find and verify a working version:
|
|||
|
if ($NO_PSQL_OPTION) {
|
|||
|
delete $opt{PSQL} and ndie msg('opt-psql-restrict');
|
|||
|
}
|
|||
|
|
|||
|
if (! defined $PSQL or ! length $PSQL) {
|
|||
|
if (exists $opt{PSQL}) {
|
|||
|
$PSQL = $opt{PSQL};
|
|||
|
$PSQL =~ m{^/[\w\d\/]*psql$} or ndie msg('opt-psql-badpath');
|
|||
|
-e $PSQL or ndie msg('opt-psql-noexist', $PSQL);
|
|||
|
}
|
|||
|
else {
|
|||
|
my $psql = $ENV{PGBINDIR} ? "$ENV{PGBINDIR}/psql" : 'psql';
|
|||
|
chomp($PSQL = qx{which $psql});
|
|||
|
$PSQL or ndie msg('opt-psql-nofind');
|
|||
|
}
|
|||
|
}
|
|||
|
-x $PSQL or ndie msg('opt-psql-noexec', $PSQL);
|
|||
|
$res = qx{$PSQL --version};
|
|||
|
$res =~ /psql\D+(\d+\.\d+)/ or ndie msg('opt-psql-nover');
|
|||
|
our $psql_version = $1;
|
|||
|
|
|||
|
$VERBOSE >= 2 and warn qq{psql=$PSQL version=$psql_version\n};
|
|||
|
|
|||
|
$opt{defaultdb} = $psql_version >= 8.0 ? 'postgres' : 'template1';
|
|||
|
$opt{defaultdb} = 'pgbouncer' if $action =~ /^pgb/;
|
|||
|
|
|||
|
## Create the list of databases we are going to connect to
|
|||
|
my @targetdb = setup_target_databases();
|
|||
|
|
|||
|
sub add_response {
|
|||
|
|
|||
|
my ($type,$msg) = @_;
|
|||
|
|
|||
|
$db->{host} ||= '';
|
|||
|
|
|||
|
if ($STANDBY) {
|
|||
|
$action_info->{$action}[0] = 1;
|
|||
|
}
|
|||
|
|
|||
|
if ($nohost) {
|
|||
|
push @{$type->{''}} => [$msg, length $nohost > 1 ? $nohost : ''];
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
my $dbservice = $db->{dbservice};
|
|||
|
my $dbname = defined $db->{dbname} ? qq{DB "$db->{dbname}"} : '';
|
|||
|
my $dbhost = (!$db->{host} or $db->{host} eq '<none>') ? '' : qq{ (host:$db->{host})};
|
|||
|
my $dbport = defined $db->{port} ? ($db->{port} eq $opt{defaultport} ? '' : qq{ (port=$db->{port}) }) : '';
|
|||
|
|
|||
|
## Same_schema gets some different output
|
|||
|
my $same_schema_header = '';
|
|||
|
if ($action eq 'same_schema') {
|
|||
|
|
|||
|
## Pretty display of what exactly those numbers mean!
|
|||
|
my $number = 0;
|
|||
|
my $historical = 0;
|
|||
|
for my $row (@targetdb) {
|
|||
|
$number++;
|
|||
|
if (exists $row->{filename}) {
|
|||
|
$historical = 1;
|
|||
|
$same_schema_header .= sprintf "\nDB %s: File=%s\nDB %s: %s: %s %s: %s",
|
|||
|
$number,
|
|||
|
$row->{filename},
|
|||
|
$number,
|
|||
|
'Creation date',
|
|||
|
$row->{ctime},
|
|||
|
'CP version',
|
|||
|
$row->{cversion};
|
|||
|
}
|
|||
|
$same_schema_header .= sprintf "\nDB %s: %s%s%s%s%s",
|
|||
|
$number,
|
|||
|
defined $row->{dbservice} ? qq{dbservice=$row->{dbservice} } : '',
|
|||
|
defined $row->{port} ? qq{port=$row->{port} } : '',
|
|||
|
defined $row->{host} ? qq{host=$row->{host} } : '',
|
|||
|
defined $row->{dbname} ? qq{dbname=$row->{dbname} } : '',
|
|||
|
defined $row->{dbuser} ? qq{user=$row->{dbuser} } : '';
|
|||
|
$same_schema_header .= "\nDB $number: PG version: $row->{pgversion}";
|
|||
|
$same_schema_header .= "\nDB $number: Total objects: $row->{objects}";
|
|||
|
}
|
|||
|
|
|||
|
## Databases
|
|||
|
$number = 1;
|
|||
|
my %dlist = map { ($_->{dbname} || ''), $number++; } @targetdb;
|
|||
|
if (keys %dlist > 1 and ! $historical) {
|
|||
|
my $dblist = join ',' => sort { $dlist{$a} <=> $dlist{$b} } keys %dlist;
|
|||
|
$dbname = qq{ (databases:$dblist)};
|
|||
|
}
|
|||
|
## Hosts
|
|||
|
$number = 1;
|
|||
|
my %hostlist = map { ($_->{host} || ''), $number++; } @targetdb;
|
|||
|
if (keys %hostlist > 1 and ! $historical) {
|
|||
|
my $dblist = join ',' => sort { $hostlist{$a} <=> $hostlist{$b} } keys %hostlist;
|
|||
|
$dbhost = qq{ (hosts:$dblist)};
|
|||
|
}
|
|||
|
## Ports
|
|||
|
$number = 1;
|
|||
|
my %portlist = map { ($_->{port} || ''), $number++; } @targetdb;
|
|||
|
if (keys %portlist > 1 and ! $historical) {
|
|||
|
my $dblist = join ',' => sort { $portlist{$a} <=> $portlist{$b} } keys %portlist;
|
|||
|
$dbport = qq{ (ports:$dblist)};
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
my $header = sprintf q{%s%s%s%s},
|
|||
|
($action_info->{$action}[0] ? '' : (defined $dbservice and length $dbservice)) ?
|
|||
|
qq{service=$dbservice} : $dbname,
|
|||
|
(defined $db->{showschema} ? qq{ schema:$db->{showschema} } : ''),
|
|||
|
$dbhost,
|
|||
|
$dbport;
|
|||
|
$header =~ s/\s+$//;
|
|||
|
$header =~ s/^ //;
|
|||
|
my $perf = ($opt{showtime} and $db->{totaltime} and $action ne 'bloat') ? "time=$db->{totaltime}s" : '';
|
|||
|
if ($db->{perf}) {
|
|||
|
$db->{perf} =~ s/^ +//;
|
|||
|
if (length $same_schema_header) {
|
|||
|
$db->{perf} =~ s/^\n//;
|
|||
|
$db->{perf} = "$same_schema_header\n$db->{perf}";
|
|||
|
}
|
|||
|
$perf .= sprintf '%s%s', length($perf) ? ' ' : '', $db->{perf};
|
|||
|
}
|
|||
|
|
|||
|
## Strip trailing semicolons as allowed by the Nagios spec
|
|||
|
## But not for same_schema, where we might have (for example) a view def
|
|||
|
if ($action ne 'same_schema') {
|
|||
|
$perf =~ s/; / /;
|
|||
|
$perf =~ s/;$//;
|
|||
|
}
|
|||
|
|
|||
|
push @{$type->{$header}} => [$msg,$perf];
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of add_response
|
|||
|
|
|||
|
|
|||
|
sub add_unknown {
|
|||
|
my $msg = shift || $db->{error};
|
|||
|
$msg =~ s/[\r\n]\s*/\\n /g;
|
|||
|
$msg =~ s/\|/<PIPE>/g if $opt{showperf};
|
|||
|
add_response \%unknown, $msg;
|
|||
|
}
|
|||
|
sub add_critical {
|
|||
|
add_response \%critical, shift;
|
|||
|
}
|
|||
|
sub add_warning {
|
|||
|
add_response \%warning, shift;
|
|||
|
}
|
|||
|
sub add_ok {
|
|||
|
add_response \%ok, shift;
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
sub do_mrtg {
|
|||
|
## Hashref of info to pass out for MRTG or stat
|
|||
|
my $arg = shift;
|
|||
|
my $one = $arg->{one} || 0;
|
|||
|
my $two = $arg->{two} || 0;
|
|||
|
if ($SIMPLE) {
|
|||
|
$one = $two if (length $two and $two > $one);
|
|||
|
if ($opt{transform} eq 'KB' and $one =~ /^\d+$/) {
|
|||
|
$one = int $one/(1024);
|
|||
|
}
|
|||
|
if ($opt{transform} eq 'MB' and $one =~ /^\d+$/) {
|
|||
|
$one = int $one/(1024*1024);
|
|||
|
}
|
|||
|
elsif ($opt{transform} eq 'GB' and $one =~ /^\d+$/) {
|
|||
|
$one = int $one/(1024*1024*1024);
|
|||
|
}
|
|||
|
elsif ($opt{transform} eq 'TB' and $one =~ /^\d+$/) {
|
|||
|
$one = int $one/(1024*1024*1024*1024);
|
|||
|
}
|
|||
|
elsif ($opt{transform} eq 'EB' and $one =~ /^\d+$/) {
|
|||
|
$one = int $one/(1024*1024*1024*1024*1024);
|
|||
|
}
|
|||
|
print "$one\n";
|
|||
|
}
|
|||
|
else {
|
|||
|
my $uptime = $arg->{uptime} || '';
|
|||
|
my $message = $arg->{msg} || '';
|
|||
|
print "$one\n$two\n$uptime\n$message\n";
|
|||
|
}
|
|||
|
exit 0;
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
sub bad_mrtg {
|
|||
|
my $msg = shift;
|
|||
|
$ERROR and ndie $ERROR;
|
|||
|
warn msgn('mrtg-fail', $action, $msg);
|
|||
|
exit 3;
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
sub do_mrtg_stats {
|
|||
|
|
|||
|
## Show the two highest items for mrtg stats hash
|
|||
|
|
|||
|
my $msg = shift;
|
|||
|
defined $msg or ndie('unknown-error');
|
|||
|
|
|||
|
keys %stats or bad_mrtg($msg);
|
|||
|
my ($one,$two) = ('','');
|
|||
|
for (sort { $stats{$b} <=> $stats{$a} } keys %stats) {
|
|||
|
if ($one eq '') {
|
|||
|
$one = $stats{$_};
|
|||
|
$msg = exists $statsmsg{$_} ? $statsmsg{$_} : "DB: $_";
|
|||
|
next;
|
|||
|
}
|
|||
|
$two = $stats{$_};
|
|||
|
last;
|
|||
|
}
|
|||
|
do_mrtg({one => $one, two => $two, msg => $msg});
|
|||
|
}
|
|||
|
|
|||
|
sub make_sure_mode_is {
|
|||
|
|
|||
|
## Requires $ENV{PGDATA} or --datadir
|
|||
|
|
|||
|
$db->{host} = '<none>';
|
|||
|
|
|||
|
## Run pg_controldata, grab the mode
|
|||
|
$res = open_controldata();
|
|||
|
|
|||
|
my $regex = msg('checkmode-state');
|
|||
|
if ($res !~ /$regex\s*(.+)/) { ## no critic (ProhibitUnusedCapture)
|
|||
|
## Just in case, check the English one as well
|
|||
|
$regex = msg_en('checkmode-state');
|
|||
|
if ($res !~ /$regex\s*(.+)/) {
|
|||
|
ndie msg('checkpoint-noregex');
|
|||
|
}
|
|||
|
}
|
|||
|
my $last = $1;
|
|||
|
|
|||
|
return $last;
|
|||
|
|
|||
|
}
|
|||
|
|
|||
|
sub make_sure_standby_mode {
|
|||
|
|
|||
|
## Checks if database in standby mode
|
|||
|
## Requires $ENV{PGDATA} or --datadir
|
|||
|
|
|||
|
my $last = make_sure_mode_is();
|
|||
|
|
|||
|
my $regex = msg('checkmode-recovery');
|
|||
|
if ($last =~ /$regex/) {
|
|||
|
$STANDBY = 1;
|
|||
|
}
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of make_sure_standby_mode
|
|||
|
|
|||
|
sub make_sure_prod {
|
|||
|
|
|||
|
## Checks if database in production mode
|
|||
|
## Requires $ENV{PGDATA} or --datadir
|
|||
|
|
|||
|
my $last = make_sure_mode_is();
|
|||
|
|
|||
|
my $regex = msg('checkmode-prod');
|
|||
|
if ($last =~ /$regex/) {
|
|||
|
$MASTER = 1;
|
|||
|
}
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of make_sure_production_mode
|
|||
|
|
|||
|
sub finishup {
|
|||
|
|
|||
|
## Final output
|
|||
|
## These are meant to be compact and terse: sometimes messages go to pagers
|
|||
|
|
|||
|
if ($MRTG) {
|
|||
|
## Try hard to ferret out a message in case we short-circuited here
|
|||
|
my $msg = [[]];
|
|||
|
if (keys %critical) {
|
|||
|
($msg) = values %critical;
|
|||
|
}
|
|||
|
elsif (keys %warning) {
|
|||
|
($msg) = values %warning;
|
|||
|
}
|
|||
|
elsif (keys %ok) {
|
|||
|
($msg) = values %ok;
|
|||
|
}
|
|||
|
elsif (keys %unknown) {
|
|||
|
($msg) = values %unknown;
|
|||
|
}
|
|||
|
do_mrtg_stats($msg->[0][0]);
|
|||
|
}
|
|||
|
|
|||
|
$action =~ s/^\s*(\S+)\s*$/$1/;
|
|||
|
my $service = sprintf "%s$action", $FANCYNAME ? 'postgres_' : '';
|
|||
|
if (keys %critical or keys %warning or keys %ok or keys %unknown) {
|
|||
|
## If in quiet mode, print nothing if all is ok
|
|||
|
if ($opt{quiet} and ! keys %critical and ! keys %warning and ! keys %unknown) {
|
|||
|
}
|
|||
|
else {
|
|||
|
printf '%s ', $YELLNAME ? uc $service : $service;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
sub dumpresult {
|
|||
|
my ($type,$info) = @_;
|
|||
|
my $SEP = ' * ';
|
|||
|
## Are we showing DEBUG_INFO?
|
|||
|
my $showdebug = 0;
|
|||
|
if ($DEBUGOUTPUT) {
|
|||
|
$showdebug = 1 if $DEBUGOUTPUT =~ /a/io
|
|||
|
or ($DEBUGOUTPUT =~ /c/io and $type eq 'c')
|
|||
|
or ($DEBUGOUTPUT =~ /w/io and $type eq 'w')
|
|||
|
or ($DEBUGOUTPUT =~ /o/io and $type eq 'o')
|
|||
|
or ($DEBUGOUTPUT =~ /u/io and $type eq 'u');
|
|||
|
}
|
|||
|
for (sort keys %$info) {
|
|||
|
printf '%s %s%s ',
|
|||
|
$_,
|
|||
|
$showdebug ? "[DEBUG: $DEBUG_INFO] " : '',
|
|||
|
join $SEP => map { $_->[0] } @{$info->{$_}};
|
|||
|
}
|
|||
|
if ($opt{showperf}) {
|
|||
|
my $pmsg = '';
|
|||
|
for (sort keys %$info) {
|
|||
|
my $m = sprintf '%s ', join ' ' => map { $_->[1] } @{$info->{$_}};
|
|||
|
$pmsg .= $m;
|
|||
|
}
|
|||
|
$pmsg =~ s/^\s+//;
|
|||
|
$pmsg and print "| $pmsg";
|
|||
|
}
|
|||
|
print "\n";
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
}
|
|||
|
|
|||
|
if (keys %critical) {
|
|||
|
print 'CRITICAL: ';
|
|||
|
dumpresult(c => \%critical);
|
|||
|
exit 2;
|
|||
|
}
|
|||
|
if (keys %warning) {
|
|||
|
print 'WARNING: ';
|
|||
|
dumpresult(w => \%warning);
|
|||
|
exit 1;
|
|||
|
}
|
|||
|
if (keys %ok) {
|
|||
|
## We print nothing if in quiet mode
|
|||
|
if (! $opt{quiet}) {
|
|||
|
print 'OK: ';
|
|||
|
dumpresult(o => \%ok);
|
|||
|
}
|
|||
|
exit 0;
|
|||
|
}
|
|||
|
if (keys %unknown) {
|
|||
|
print 'UNKNOWN: ';
|
|||
|
dumpresult(u => \%unknown);
|
|||
|
exit 3;
|
|||
|
}
|
|||
|
|
|||
|
die $USAGE;
|
|||
|
|
|||
|
} ## end of finishup
|
|||
|
|
|||
|
|
|||
|
## For options that take a size e.g. --critical="10 GB"
|
|||
|
our $sizere = qr{^\s*(\d+\.?\d?)\s*([bkmgtepz])?\w*$}i; ## Don't care about the rest of the string
|
|||
|
|
|||
|
## For options that take a time e.g. --critical="10 minutes" Fractions are allowed.
|
|||
|
our $timere = qr{^\s*(\d+(?:\.\d+)?)\s*(\w*)\s*$}i;
|
|||
|
|
|||
|
## For options that must be specified in seconds
|
|||
|
our $timesecre = qr{^\s*(\d+)\s*(?:s(?:econd|ec)?)?s?\s*$};
|
|||
|
|
|||
|
## For simple checksums:
|
|||
|
our $checksumre = qr{^[a-f0-9]{32}$};
|
|||
|
|
|||
|
## If in test mode, verify that we can run each requested action
|
|||
|
our %testaction = (
|
|||
|
autovac_freeze => 'VERSION: 8.2',
|
|||
|
last_vacuum => 'ON: stats_row_level(<8.3) VERSION: 8.2',
|
|||
|
last_analyze => 'ON: stats_row_level(<8.3) VERSION: 8.2',
|
|||
|
last_autovacuum => 'ON: stats_row_level(<8.3) VERSION: 8.2',
|
|||
|
last_autoanalyze => 'ON: stats_row_level(<8.3) VERSION: 8.2',
|
|||
|
prepared_txns => 'VERSION: 8.1',
|
|||
|
database_size => 'VERSION: 8.1',
|
|||
|
disabled_triggers => 'VERSION: 8.1',
|
|||
|
relation_size => 'VERSION: 8.1',
|
|||
|
sequence => 'VERSION: 8.1',
|
|||
|
table_size => 'VERSION: 8.1',
|
|||
|
index_size => 'VERSION: 8.1',
|
|||
|
query_time => 'VERSION: 8.1',
|
|||
|
txn_idle => 'VERSION: 8.3',
|
|||
|
txn_time => 'VERSION: 8.3',
|
|||
|
wal_files => 'VERSION: 8.1',
|
|||
|
archive_ready => 'VERSION: 8.1',
|
|||
|
fsm_pages => 'VERSION: 8.2 MAX: 8.3',
|
|||
|
fsm_relations => 'VERSION: 8.2 MAX: 8.3',
|
|||
|
hot_standby_delay => 'VERSION: 9.0',
|
|||
|
listener => 'MAX: 8.4',
|
|||
|
);
|
|||
|
if ($opt{test}) {
|
|||
|
print msgn('testmode-start');
|
|||
|
my $info = run_command('SELECT name, setting FROM pg_settings');
|
|||
|
my %set; ## port, host, name, user
|
|||
|
for my $db (@{$info->{db}}) {
|
|||
|
if (exists $db->{fail}) {
|
|||
|
(my $err = $db->{error}) =~ s/\s*\n\s*/ \| /g;
|
|||
|
print msgn('testmode-fail', $db->{pname}, $err);
|
|||
|
next;
|
|||
|
}
|
|||
|
print msgn('testmode-ok', $db->{pname});
|
|||
|
for (@{ $db->{slurp} }) {
|
|||
|
$set{$_->{name}} = $_->{setting};
|
|||
|
}
|
|||
|
}
|
|||
|
for my $ac (split /\s+/ => $action) {
|
|||
|
my $limit = $testaction{lc $ac};
|
|||
|
next if ! defined $limit;
|
|||
|
|
|||
|
if ($limit =~ /VERSION: ((\d+)\.(\d+))/) {
|
|||
|
my ($rver,$rmaj,$rmin) = ($1,$2,$3);
|
|||
|
for my $db (@{$info->{db}}) {
|
|||
|
next unless exists $db->{ok};
|
|||
|
if ($set{server_version} !~ /((\d+)\.(\d+))/) {
|
|||
|
print msgn('testmode-nover', $db->{pname});
|
|||
|
next;
|
|||
|
}
|
|||
|
my ($sver,$smaj,$smin) = ($1,$2,$3);
|
|||
|
if ($smaj < $rmaj or ($smaj==$rmaj and $smin < $rmin)) {
|
|||
|
print msgn('testmode-norun', $ac, $db->{pname}, $rver, $sver);
|
|||
|
}
|
|||
|
$db->{version} = $sver;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if ($limit =~ /MAX: ((\d+)\.(\d+))/) {
|
|||
|
my ($rver,$rmaj,$rmin) = ($1,$2,$3);
|
|||
|
for my $db (@{$info->{db}}) {
|
|||
|
next unless exists $db->{ok};
|
|||
|
if ($set{server_version} !~ /((\d+)\.(\d+))/) {
|
|||
|
print msgn('testmode-nover', $db->{pname});
|
|||
|
next;
|
|||
|
}
|
|||
|
my ($sver,$smaj,$smin) = ($1,$2,$3);
|
|||
|
if ($smaj > $rmaj or ($smaj==$rmaj and $smin > $rmin)) {
|
|||
|
print msgn('testmode-norun', $ac, $db->{pname}, $rver, $sver);
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
while ($limit =~ /\bON: (\w+)(?:\(([<>=])(\d+\.\d+)\))?/g) {
|
|||
|
my ($setting,$op,$ver) = ($1,$2||'',$3||0);
|
|||
|
for my $db (@{$info->{db}}) {
|
|||
|
next unless exists $db->{ok};
|
|||
|
if ($ver) {
|
|||
|
next if $op eq '<' and $db->{version} >= $ver;
|
|||
|
next if $op eq '>' and $db->{version} <= $ver;
|
|||
|
next if $op eq '=' and $db->{version} != $ver;
|
|||
|
}
|
|||
|
my $val = $set{$setting};
|
|||
|
if ($val ne 'on') {
|
|||
|
print msgn('testmode-noset', $ac, $db->{pname}, $setting);
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
print msgn('testmode-end');
|
|||
|
exit 0;
|
|||
|
}
|
|||
|
|
|||
|
## Expand the list of included/excluded users into a standard format
|
|||
|
our $USERWHERECLAUSE = '';
|
|||
|
if ($opt{includeuser}) {
|
|||
|
my %userlist;
|
|||
|
for my $user (@{$opt{includeuser}}) {
|
|||
|
for my $u2 (split /,/ => $user) {
|
|||
|
$userlist{$u2}++;
|
|||
|
}
|
|||
|
}
|
|||
|
my $safename;
|
|||
|
if (1 == keys %userlist) {
|
|||
|
($safename = each %userlist) =~ s/'/''/g;
|
|||
|
$USERWHERECLAUSE = " AND usename = '$safename'";
|
|||
|
}
|
|||
|
else {
|
|||
|
$USERWHERECLAUSE = ' AND usename IN (';
|
|||
|
for my $user (sort keys %userlist) {
|
|||
|
($safename = $user) =~ s/'/''/g;
|
|||
|
$USERWHERECLAUSE .= "'$safename',";
|
|||
|
}
|
|||
|
chop $USERWHERECLAUSE;
|
|||
|
$USERWHERECLAUSE .= ')';
|
|||
|
}
|
|||
|
}
|
|||
|
elsif ($opt{excludeuser}) {
|
|||
|
my %userlist;
|
|||
|
for my $user (@{$opt{excludeuser}}) {
|
|||
|
for my $u2 (split /,/ => $user) {
|
|||
|
$userlist{$u2}++;
|
|||
|
}
|
|||
|
}
|
|||
|
my $safename;
|
|||
|
if (1 == keys %userlist) {
|
|||
|
($safename = each %userlist) =~ s/'/''/g;
|
|||
|
$USERWHERECLAUSE = " AND usename <> '$safename'";
|
|||
|
}
|
|||
|
else {
|
|||
|
$USERWHERECLAUSE = ' AND usename NOT IN (';
|
|||
|
for my $user (sort keys %userlist) {
|
|||
|
($safename = $user) =~ s/'/''/g;
|
|||
|
$USERWHERECLAUSE .= "'$safename',";
|
|||
|
}
|
|||
|
chop $USERWHERECLAUSE;
|
|||
|
$USERWHERECLAUSE .= ')';
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
## Check number of connections, compare to max_connections
|
|||
|
check_backends() if $action eq 'backends';
|
|||
|
|
|||
|
## Table and index bloat
|
|||
|
check_bloat() if $action eq 'bloat';
|
|||
|
|
|||
|
## Simple connection, warning or critical options
|
|||
|
check_connection() if $action eq 'connection';
|
|||
|
|
|||
|
## Check the commitratio of one or more databases
|
|||
|
check_commitratio() if $action eq 'commitratio';
|
|||
|
|
|||
|
## Check the hitratio of one or more databases
|
|||
|
check_hitratio() if $action eq 'hitratio';
|
|||
|
|
|||
|
## Check the size of one or more databases
|
|||
|
check_database_size() if $action eq 'database_size';
|
|||
|
|
|||
|
## Check local disk_space - local means it must be run from the same box!
|
|||
|
check_disk_space() if $action eq 'disk_space';
|
|||
|
|
|||
|
## Check the size of relations, or more specifically, tables and indexes
|
|||
|
check_index_size() if $action eq 'index_size';
|
|||
|
check_table_size() if $action eq 'table_size';
|
|||
|
check_relation_size() if $action eq 'relation_size';
|
|||
|
|
|||
|
## Check how long since the last full analyze
|
|||
|
check_last_analyze() if $action eq 'last_analyze';
|
|||
|
|
|||
|
## Check how long since the last full vacuum
|
|||
|
check_last_vacuum() if $action eq 'last_vacuum';
|
|||
|
|
|||
|
## Check how long since the last AUTOanalyze
|
|||
|
check_last_analyze('auto') if $action eq 'last_autoanalyze';
|
|||
|
|
|||
|
## Check how long since the last full AUTOvacuum
|
|||
|
check_last_vacuum('auto') if $action eq 'last_autovacuum';
|
|||
|
|
|||
|
## Check that someone is listening for a specific thing
|
|||
|
check_listener() if $action eq 'listener';
|
|||
|
|
|||
|
## Check number and type of locks
|
|||
|
check_locks() if $action eq 'locks';
|
|||
|
|
|||
|
## Logfile is being written to
|
|||
|
check_logfile() if $action eq 'logfile';
|
|||
|
|
|||
|
## Known query finishes in a good amount of time
|
|||
|
check_query_runtime() if $action eq 'query_runtime';
|
|||
|
|
|||
|
## Check the length of running queries
|
|||
|
check_query_time() if $action eq 'query_time';
|
|||
|
|
|||
|
## Verify that the settings are what we think they should be
|
|||
|
check_settings_checksum() if $action eq 'settings_checksum';
|
|||
|
|
|||
|
## Compare DB time to localtime, alert on number of seconds difference
|
|||
|
check_timesync() if $action eq 'timesync';
|
|||
|
|
|||
|
## Check for transaction ID wraparound in all databases
|
|||
|
check_txn_wraparound() if $action eq 'txn_wraparound';
|
|||
|
|
|||
|
## Compare DB versions. warning = just major.minor, critical = full string
|
|||
|
check_version() if $action eq 'version';
|
|||
|
|
|||
|
## Check the number of WAL files. warning and critical are numbers
|
|||
|
check_wal_files() if $action eq 'wal_files';
|
|||
|
|
|||
|
## Check the number of WAL files ready to archive. warning and critical are numbers
|
|||
|
check_archive_ready() if $action eq 'archive_ready';
|
|||
|
|
|||
|
## Check the replication delay in hot standby setup
|
|||
|
check_hot_standby_delay() if $action eq 'hot_standby_delay';
|
|||
|
|
|||
|
## Check the maximum transaction age of all connections
|
|||
|
check_txn_time() if $action eq 'txn_time';
|
|||
|
|
|||
|
## Check the maximum age of idle in transaction connections
|
|||
|
check_txn_idle() if $action eq 'txn_idle';
|
|||
|
|
|||
|
## Run a custom query
|
|||
|
check_custom_query() if $action eq 'custom_query';
|
|||
|
|
|||
|
## Test of replication
|
|||
|
check_replicate_row() if $action eq 'replicate_row';
|
|||
|
|
|||
|
## Compare database schemas
|
|||
|
check_same_schema() if $action eq 'same_schema';
|
|||
|
|
|||
|
## Check sequence values
|
|||
|
check_sequence() if $action eq 'sequence';
|
|||
|
|
|||
|
## See how close we are to autovacuum_freeze_max_age
|
|||
|
check_autovac_freeze() if $action eq 'autovac_freeze';
|
|||
|
|
|||
|
## See how many pages we have used up compared to max_fsm_pages
|
|||
|
check_fsm_pages() if $action eq 'fsm_pages';
|
|||
|
|
|||
|
## See how many relations we have used up compared to max_fsm_relations
|
|||
|
check_fsm_relations() if $action eq 'fsm_relations';
|
|||
|
|
|||
|
## Spit back info from the pg_stat_database table. Cacti only
|
|||
|
check_dbstats() if $action eq 'dbstats';
|
|||
|
|
|||
|
## Check how long since the last checkpoint
|
|||
|
check_checkpoint() if $action eq 'checkpoint';
|
|||
|
|
|||
|
## Check the Database System Identifier
|
|||
|
check_cluster_id() if $action eq 'cluster_id';
|
|||
|
|
|||
|
## Check for disabled triggers
|
|||
|
check_disabled_triggers() if $action eq 'disabled_triggers';
|
|||
|
|
|||
|
## Check for any prepared transactions
|
|||
|
check_prepared_txns() if $action eq 'prepared_txns';
|
|||
|
|
|||
|
## Make sure Slony is behaving
|
|||
|
check_slony_status() if $action eq 'slony_status';
|
|||
|
|
|||
|
## Verify that the pgbouncer settings are what we think they should be
|
|||
|
check_pgbouncer_checksum() if $action eq 'pgbouncer_checksum';
|
|||
|
|
|||
|
## Check the number of active clients in each pgbouncer pool
|
|||
|
check_pgb_pool('cl_active') if $action eq 'pgb_pool_cl_active';
|
|||
|
|
|||
|
## Check the number of waiting clients in each pgbouncer pool
|
|||
|
check_pgb_pool('cl_waiting') if $action eq 'pgb_pool_cl_waiting';
|
|||
|
|
|||
|
## Check the number of active server connections in each pgbouncer pool
|
|||
|
check_pgb_pool('sv_active') if $action eq 'pgb_pool_sv_active';
|
|||
|
|
|||
|
## Check the number of idle server connections in each pgbouncer pool
|
|||
|
check_pgb_pool('sv_idle') if $action eq 'pgb_pool_sv_idle';
|
|||
|
|
|||
|
## Check the number of used server connections in each pgbouncer pool
|
|||
|
check_pgb_pool('sv_used') if $action eq 'pgb_pool_sv_used';
|
|||
|
|
|||
|
## Check the number of tested server connections in each pgbouncer pool
|
|||
|
check_pgb_pool('sv_tested') if $action eq 'pgb_pool_sv_tested';
|
|||
|
|
|||
|
## Check the number of login server connections in each pgbouncer pool
|
|||
|
check_pgb_pool('sv_login') if $action eq 'pgb_pool_sv_login';
|
|||
|
|
|||
|
## Check the current maximum wait time for client connections in pgbouncer pools
|
|||
|
check_pgb_pool('maxwait') if $action eq 'pgb_pool_maxwait';
|
|||
|
|
|||
|
## Check how many clients are connected to pgbouncer compared to max_client_conn.
|
|||
|
check_pgbouncer_backends() if $action eq 'pgbouncer_backends';
|
|||
|
|
|||
|
##
|
|||
|
## Everything past here does not hit a Postgres database
|
|||
|
##
|
|||
|
$nohost = 1;
|
|||
|
|
|||
|
## Check for new versions of check_postgres.pl
|
|||
|
check_new_version_cp() if $action eq 'new_version_cp';
|
|||
|
|
|||
|
## Check for new versions of Postgres
|
|||
|
check_new_version_pg() if $action eq 'new_version_pg';
|
|||
|
|
|||
|
## Check for new versions of Bucardo
|
|||
|
check_new_version_bc() if $action eq 'new_version_bc';
|
|||
|
|
|||
|
## Check for new versions of boxinfo
|
|||
|
check_new_version_box() if $action eq 'new_version_box';
|
|||
|
|
|||
|
## Check for new versions of tail_n_mail
|
|||
|
check_new_version_tnm() if $action eq 'new_version_tnm';
|
|||
|
|
|||
|
finishup();
|
|||
|
|
|||
|
exit 0;
|
|||
|
|
|||
|
|
|||
|
sub build_symlinks {
|
|||
|
|
|||
|
## Create symlinks to most actions
|
|||
|
$ME =~ /postgres/
|
|||
|
or die msgn('symlink-name');
|
|||
|
|
|||
|
my $force = $action =~ /force/ ? 1 : 0;
|
|||
|
for my $action (sort keys %$action_info) {
|
|||
|
my $space = ' ' x ($longname - length $action);
|
|||
|
my $file = "check_postgres_$action";
|
|||
|
if (-l $file) {
|
|||
|
if (!$force) {
|
|||
|
my $source = readlink $file;
|
|||
|
print msgn('symlink-done', $file, $space, $source);
|
|||
|
next;
|
|||
|
}
|
|||
|
print msg('symlink-unlink', $file, $space);
|
|||
|
unlink $file or die msgn('symlink-fail1', $file, $!);
|
|||
|
}
|
|||
|
elsif (-e $file) {
|
|||
|
print msgn('symlink-exists', $file, $space);
|
|||
|
next;
|
|||
|
}
|
|||
|
|
|||
|
if (symlink $0, $file) {
|
|||
|
print msgn('symlink-create', $file);
|
|||
|
}
|
|||
|
else {
|
|||
|
print msgn('symlink-fail2', $file, $ME, $!);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
exit 0;
|
|||
|
|
|||
|
} ## end of build_symlinks
|
|||
|
|
|||
|
|
|||
|
sub pretty_size {
|
|||
|
|
|||
|
## Transform number of bytes to a SI display similar to Postgres' format
|
|||
|
|
|||
|
my $bytes = shift;
|
|||
|
my $rounded = shift || 0;
|
|||
|
|
|||
|
return "$bytes bytes" if $bytes < 10240;
|
|||
|
|
|||
|
my @unit = qw/kB MB GB TB PB EB YB ZB/;
|
|||
|
|
|||
|
for my $p (1..@unit) {
|
|||
|
if ($bytes <= 1024**$p) {
|
|||
|
$bytes /= (1024**($p-1));
|
|||
|
return $rounded ?
|
|||
|
sprintf ('%d %s', $bytes, $unit[$p-2]) :
|
|||
|
sprintf ('%.2f %s', $bytes, $unit[$p-2]);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return $bytes;
|
|||
|
|
|||
|
} ## end of pretty_size
|
|||
|
|
|||
|
|
|||
|
sub pretty_time {
|
|||
|
|
|||
|
## Transform number of seconds to a more human-readable format
|
|||
|
## First argument is number of seconds
|
|||
|
## Second optional arg is highest transform: s,m,h,d,w
|
|||
|
## If uppercase, it indicates to "round that one out"
|
|||
|
|
|||
|
my $sec = shift;
|
|||
|
my $tweak = shift || '';
|
|||
|
|
|||
|
## Just seconds (< 2:00)
|
|||
|
if ($sec < 120 or $tweak =~ /s/) {
|
|||
|
return sprintf "$sec %s", $sec==1 ? msg('time-second') : msg('time-seconds');
|
|||
|
}
|
|||
|
|
|||
|
## Minutes and seconds (< 60:00)
|
|||
|
if ($sec < 60*60 or $tweak =~ /m/) {
|
|||
|
my $min = int $sec / 60;
|
|||
|
$sec %= 60;
|
|||
|
my $ret = sprintf "$min %s", $min==1 ? msg('time-minute') : msg('time-minutes');
|
|||
|
$sec and $tweak !~ /S/ and $ret .= sprintf " $sec %s", $sec==1 ? msg('time-second') : msg('time-seconds');
|
|||
|
return $ret;
|
|||
|
}
|
|||
|
|
|||
|
## Hours, minutes, and seconds (< 48:00:00)
|
|||
|
if ($sec < 60*60*24*2 or $tweak =~ /h/) {
|
|||
|
my $hour = int $sec / (60*60);
|
|||
|
$sec -= ($hour*60*60);
|
|||
|
my $min = int $sec / 60;
|
|||
|
$sec -= ($min*60);
|
|||
|
my $ret = sprintf "$hour %s", $hour==1 ? msg('time-hour') : msg('time-hours');
|
|||
|
$min and $tweak !~ /M/ and $ret .= sprintf " $min %s", $min==1 ? msg('time-minute') : msg('time-minutes');
|
|||
|
$sec and $tweak !~ /[SM]/ and $ret .= sprintf " $sec %s", $sec==1 ? msg('time-second') : msg('time-seconds');
|
|||
|
return $ret;
|
|||
|
}
|
|||
|
|
|||
|
## Days, hours, minutes, and seconds (< 28 days)
|
|||
|
if ($sec < 60*60*24*28 or $tweak =~ /d/) {
|
|||
|
my $day = int $sec / (60*60*24);
|
|||
|
$sec -= ($day*60*60*24);
|
|||
|
my $our = int $sec / (60*60);
|
|||
|
$sec -= ($our*60*60);
|
|||
|
my $min = int $sec / 60;
|
|||
|
$sec -= ($min*60);
|
|||
|
my $ret = sprintf "$day %s", $day==1 ? msg('time-day') : msg('time-days');
|
|||
|
$our and $tweak !~ /H/ and $ret .= sprintf " $our %s", $our==1 ? msg('time-hour') : msg('time-hours');
|
|||
|
$min and $tweak !~ /[HM]/ and $ret .= sprintf " $min %s", $min==1 ? msg('time-minute') : msg('time-minutes');
|
|||
|
$sec and $tweak !~ /[HMS]/ and $ret .= sprintf " $sec %s", $sec==1 ? msg('time-second') : msg('time-seconds');
|
|||
|
return $ret;
|
|||
|
}
|
|||
|
|
|||
|
## Weeks, days, hours, minutes, and seconds (< 28 days)
|
|||
|
my $week = int $sec / (60*60*24*7);
|
|||
|
$sec -= ($week*60*60*24*7);
|
|||
|
my $day = int $sec / (60*60*24);
|
|||
|
$sec -= ($day*60*60*24);
|
|||
|
my $our = int $sec / (60*60);
|
|||
|
$sec -= ($our*60*60);
|
|||
|
my $min = int $sec / 60;
|
|||
|
$sec -= ($min*60);
|
|||
|
my $ret = sprintf "$week %s", $week==1 ? msg('time-week') : msg('time-weeks');
|
|||
|
$day and $tweak !~ /D/ and $ret .= sprintf " $day %s", $day==1 ? msg('time-day') : msg('time-days');
|
|||
|
$our and $tweak !~ /[DH]/ and $ret .= sprintf " $our %s", $our==1 ? msg('time-hour') : msg('time-hours');
|
|||
|
$min and $tweak !~ /[DHM]/ and $ret .= sprintf " $min %s", $min==1 ? msg('time-minute') : msg('time-minutes');
|
|||
|
$sec and $tweak !~ /[DHMS]/ and $ret .= sprintf " $sec %s", $sec==1 ? msg('time-second') : msg('time-seconds');
|
|||
|
return $ret;
|
|||
|
|
|||
|
} ## end of pretty_time
|
|||
|
|
|||
|
|
|||
|
sub run_command {
|
|||
|
|
|||
|
## Run a command string against each of our databases using psql
|
|||
|
## Optional args in a hashref:
|
|||
|
## "failok" - don't report if we failed
|
|||
|
## "fatalregex" - allow this FATAL regex through
|
|||
|
## "target" - use this targetlist instead of generating one
|
|||
|
## "timeout" - change the timeout from the default of $opt{timeout}
|
|||
|
## "regex" - the query must match this or we throw an error
|
|||
|
## "emptyok" - it's okay to not match any rows at all
|
|||
|
## "version" - alternate versions for different versions
|
|||
|
## "dbnumber" - connect with this specific entry from @targetdb
|
|||
|
## "conninfo" - return the connection information string without doing anything
|
|||
|
|
|||
|
my $string = shift || '';
|
|||
|
my $arg = shift || {};
|
|||
|
my $info = { command => $string, db => [], hosts => 0 };
|
|||
|
|
|||
|
## First of all check if the server in standby mode, if so end this
|
|||
|
## with OK status.
|
|||
|
|
|||
|
if ($STANDBY) {
|
|||
|
$db->{'totaltime'} = '0.00';
|
|||
|
add_ok msg('mode-standby');
|
|||
|
if ($MRTG) {
|
|||
|
do_mrtg({one => 1});
|
|||
|
}
|
|||
|
finishup();
|
|||
|
exit 0;
|
|||
|
}
|
|||
|
|
|||
|
$VERBOSE >= 3 and warn qq{Starting run_command with: $string\n};
|
|||
|
|
|||
|
my (%host,$passfile,$passfh,$tempdir,$tempfile,$tempfh,$errorfile,$errfh);
|
|||
|
my $offset = -1;
|
|||
|
|
|||
|
## The final list of targets has been set inside @targetdb
|
|||
|
|
|||
|
if (! @targetdb) {
|
|||
|
ndie msg('runcommand-nodb');
|
|||
|
}
|
|||
|
|
|||
|
## Create a temp file to store our results
|
|||
|
my @tempdirargs = (CLEANUP => 1);
|
|||
|
if ($opt{tempdir}) {
|
|||
|
push @tempdirargs => 'DIR', $opt{tempdir};
|
|||
|
}
|
|||
|
|
|||
|
$tempdir = tempdir(@tempdirargs);
|
|||
|
($tempfh,$tempfile) = tempfile('check_postgres_psql.XXXXXXX', SUFFIX => '.tmp', DIR => $tempdir);
|
|||
|
|
|||
|
## Create another one to catch any errors
|
|||
|
($errfh,$errorfile) = tempfile('check_postgres_psql_stderr.XXXXXXX', SUFFIX => '.tmp', DIR => $tempdir);
|
|||
|
|
|||
|
## Mild cleanup of the query
|
|||
|
$string =~ s/^\s*(.+?)\s*$/$1/s;
|
|||
|
|
|||
|
## Set a statement_timeout, as a last-ditch safety measure
|
|||
|
my $timeout = $arg->{timeout} || $opt{timeout};
|
|||
|
my $dbtimeout = $timeout * 1000;
|
|||
|
if ($action !~ /^pgb/) {
|
|||
|
$string = "BEGIN;SET statement_timeout=$dbtimeout;COMMIT;$string";
|
|||
|
}
|
|||
|
|
|||
|
## Keep track of which database we are on, to allow dbnumber to work
|
|||
|
my $num = 0;
|
|||
|
|
|||
|
## Loop through and run the command on each target database
|
|||
|
for $db (@targetdb) {
|
|||
|
|
|||
|
## Skip this one if we are using dbnumber and this is not our choice
|
|||
|
$num++;
|
|||
|
if ($arg->{dbnumber} and $arg->{dbnumber} != $num) {
|
|||
|
next;
|
|||
|
}
|
|||
|
|
|||
|
## Just to keep things clean:
|
|||
|
truncate $tempfh, 0;
|
|||
|
truncate $errfh, 0;
|
|||
|
|
|||
|
## Store this target in the global target list
|
|||
|
push @{$info->{db}}, $db;
|
|||
|
|
|||
|
my @args = ('-q', '-t');
|
|||
|
if (defined $db->{dbservice} and length $db->{dbservice}) { ## XX Check for simple names
|
|||
|
$db->{pname} = "service=$db->{dbservice}";
|
|||
|
$ENV{PGSERVICE} = $db->{dbservice};
|
|||
|
}
|
|||
|
else {
|
|||
|
$db->{pname} = "port=$db->{port} host=$db->{host} db=$db->{dbname} user=$db->{dbuser}";
|
|||
|
}
|
|||
|
|
|||
|
## If all we want is a connection string, give it and leave now
|
|||
|
if ($arg->{conninfo}) {
|
|||
|
return $db->{pname};
|
|||
|
}
|
|||
|
|
|||
|
defined $db->{dbname} and push @args, '-d', $db->{dbname};
|
|||
|
defined $db->{dbuser} and push @args, '-U', $db->{dbuser};
|
|||
|
defined $db->{port} and push @args => '-p', $db->{port};
|
|||
|
if ($db->{host} ne '<none>') {
|
|||
|
push @args => '-h', $db->{host};
|
|||
|
$host{$db->{host}}++; ## For the overall count
|
|||
|
}
|
|||
|
|
|||
|
if (defined $db->{dbpass} and length $db->{dbpass}) {
|
|||
|
## Make a custom PGPASSFILE. Far better to simply use your own .pgpass of course
|
|||
|
($passfh,$passfile) = tempfile('check_postgres.XXXXXXXX', SUFFIX => '.tmp', DIR => $tempdir);
|
|||
|
$VERBOSE >= 3 and warn msgn('runcommand-pgpass', $passfile);
|
|||
|
$ENV{PGPASSFILE} = $passfile;
|
|||
|
printf $passfh "%s:%s:%s:%s:%s\n",
|
|||
|
$db->{host} eq '<none>' ? '*' : $db->{host}, $db->{port}, $db->{dbname}, $db->{dbuser}, $db->{dbpass};
|
|||
|
close $passfh or ndie msg('file-noclose', $passfile, $!);
|
|||
|
}
|
|||
|
|
|||
|
push @args, '-o', $tempfile;
|
|||
|
push @args => '-x';
|
|||
|
|
|||
|
## If we've got different SQL, use this first run to simply grab the version
|
|||
|
## Then we'll use that info to pick the real query
|
|||
|
if ($arg->{version}) {
|
|||
|
if (!$db->{version}) {
|
|||
|
$arg->{versiononly} = 1;
|
|||
|
$arg->{oldstring} = $string;
|
|||
|
$string = 'SELECT version()';
|
|||
|
}
|
|||
|
else {
|
|||
|
$string = $arg->{oldstring} || $arg->{string};
|
|||
|
for my $row (@{$arg->{version}}) {
|
|||
|
if ($row !~ s/^([<>]?)(\d+\.\d+)\s+//) {
|
|||
|
ndie msg('die-badversion', $row);
|
|||
|
}
|
|||
|
my ($mod,$ver) = ($1||'',$2);
|
|||
|
if ($mod eq '>' and $db->{version} > $ver) {
|
|||
|
$string = $row;
|
|||
|
last;
|
|||
|
}
|
|||
|
if ($mod eq '<' and $db->{version} < $ver) {
|
|||
|
$string = $row;
|
|||
|
last;
|
|||
|
}
|
|||
|
if ($mod eq '' and $db->{version} eq $ver) {
|
|||
|
$string = $row;
|
|||
|
}
|
|||
|
}
|
|||
|
delete $arg->{version};
|
|||
|
$info->{command} = $string;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
local $SIG{ALRM} = sub { die 'Timed out' };
|
|||
|
alarm 0;
|
|||
|
|
|||
|
push @args, '-c', $string;
|
|||
|
|
|||
|
$VERBOSE >= 3 and warn Dumper \@args;
|
|||
|
|
|||
|
my $start = $opt{showtime} ? [gettimeofday()] : 0;
|
|||
|
open my $oldstderr, '>&', \*STDERR or ndie msg('runcommand-nodupe');
|
|||
|
open STDERR, '>', $errorfile or ndie msg('runcommand-noerr');
|
|||
|
eval {
|
|||
|
alarm $timeout;
|
|||
|
$res = system $PSQL => @args;
|
|||
|
};
|
|||
|
my $err = $@;
|
|||
|
alarm 0;
|
|||
|
open STDERR, '>&', $oldstderr or ndie msg('runcommand-noerr');
|
|||
|
close $oldstderr or ndie msg('file-noclose', 'STDERR copy', $!);
|
|||
|
if ($err) {
|
|||
|
if ($err =~ /Timed out/) {
|
|||
|
ndie msg('runcommand-timeout', $timeout);
|
|||
|
}
|
|||
|
else {
|
|||
|
ndie msg('runcommand-err');
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
$db->{totaltime} = sprintf '%.2f', $opt{showtime} ? tv_interval($start) : 0;
|
|||
|
|
|||
|
if ($res) {
|
|||
|
$db->{fail} = $res;
|
|||
|
$VERBOSE >= 3 and !$arg->{failok} and warn msgn('runcommand-nosys', $res);
|
|||
|
seek $errfh, 0, 0;
|
|||
|
{
|
|||
|
local $/;
|
|||
|
$db->{error} = <$errfh> || '';
|
|||
|
$db->{error} =~ s/\s*$//;
|
|||
|
$db->{error} =~ s/^psql: //;
|
|||
|
$ERROR = $db->{error};
|
|||
|
}
|
|||
|
|
|||
|
if ($db->{error} =~ /FATAL/) {
|
|||
|
## If we are just trying to connect, this should be a normal error
|
|||
|
if ($action eq 'connection') {
|
|||
|
$info->{fatal} = 1;
|
|||
|
return $info;
|
|||
|
}
|
|||
|
|
|||
|
if (exists $arg->{fatalregex} and $db->{error} =~ /$arg->{fatalregex}/) {
|
|||
|
$info->{fatalregex} = $db->{error};
|
|||
|
next;
|
|||
|
}
|
|||
|
else {
|
|||
|
ndie "$db->{error}";
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
elsif ($db->{error} =~ /statement timeout/) {
|
|||
|
ndie msg('runcommand-timeout', $timeout);
|
|||
|
}
|
|||
|
|
|||
|
if ($db->{fail} and !$arg->{failok} and !$arg->{noverify}) {
|
|||
|
|
|||
|
## Check if problem is due to backend being too old for this check
|
|||
|
verify_version();
|
|||
|
|
|||
|
if (exists $db->{error}) {
|
|||
|
ndie $db->{error};
|
|||
|
}
|
|||
|
|
|||
|
add_unknown;
|
|||
|
## Remove it from the returned hash
|
|||
|
pop @{$info->{db}};
|
|||
|
}
|
|||
|
}
|
|||
|
else {
|
|||
|
seek $tempfh, 0, 0;
|
|||
|
{
|
|||
|
local $/;
|
|||
|
$db->{slurp} = <$tempfh>;
|
|||
|
}
|
|||
|
$db->{ok} = 1;
|
|||
|
|
|||
|
## Unfortunately, psql outputs "(No rows)" even with -t and -x
|
|||
|
$db->{slurp} = '' if ! defined $db->{slurp} or index($db->{slurp},'(')==0;
|
|||
|
|
|||
|
## Allow an empty query (no matching rows) if requested
|
|||
|
if ($arg->{emptyok} and $db->{slurp} =~ /^\s*$/o) {
|
|||
|
$arg->{emptyok2} = 1;
|
|||
|
}
|
|||
|
## If we just want a version, grab it and redo
|
|||
|
if ($arg->{versiononly}) {
|
|||
|
if ($db->{error}) {
|
|||
|
ndie $db->{error};
|
|||
|
}
|
|||
|
if ($db->{slurp} !~ /(\d+\.\d+)/) {
|
|||
|
ndie msg('die-badversion', $db->{slurp});
|
|||
|
}
|
|||
|
$db->{version} = $1;
|
|||
|
$db->{ok} = 0;
|
|||
|
delete $arg->{versiononly};
|
|||
|
## Remove this from the returned hash
|
|||
|
pop @{$info->{db}};
|
|||
|
redo;
|
|||
|
}
|
|||
|
|
|||
|
## If we were provided with a regex, check and bail if it fails
|
|||
|
if ($arg->{regex} and ! $arg->{emptyok2}) {
|
|||
|
if ($db->{slurp} !~ $arg->{regex}) {
|
|||
|
## Check if problem is due to backend being too old for this check
|
|||
|
|
|||
|
verify_version();
|
|||
|
|
|||
|
add_unknown msg('invalid-query', $db->{slurp});
|
|||
|
|
|||
|
finishup();
|
|||
|
exit 0;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
## Transform psql output into an arrayref of hashes
|
|||
|
my @stuff;
|
|||
|
my $lnum = 0;
|
|||
|
my $lastval;
|
|||
|
for my $line (split /\n/ => $db->{slurp}) {
|
|||
|
|
|||
|
if (index($line,'-')==0) {
|
|||
|
$lnum++;
|
|||
|
next;
|
|||
|
}
|
|||
|
if ($line =~ /^([\?\w]+)\s+\| (.*)/) {
|
|||
|
$stuff[$lnum]{$1} = $2;
|
|||
|
$lastval = $1;
|
|||
|
}
|
|||
|
elsif ($line =~ /^QUERY PLAN\s+\| (.*)/) {
|
|||
|
$stuff[$lnum]{queryplan} = $1;
|
|||
|
$lastval = 'queryplan';
|
|||
|
}
|
|||
|
elsif ($line =~ /^\s+: (.*)/) {
|
|||
|
$stuff[$lnum]{$lastval} .= "\n$1";
|
|||
|
}
|
|||
|
elsif ($line =~ /^\s+\| (.+)/) {
|
|||
|
$stuff[$lnum]{$lastval} .= "\n$1";
|
|||
|
}
|
|||
|
## No content: can happen in the source of functions, for example
|
|||
|
elsif ($line =~ /^\s+\|\s+$/) {
|
|||
|
$stuff[$lnum]{$lastval} .= "\n";
|
|||
|
}
|
|||
|
else {
|
|||
|
my $msg = msg('no-parse-psql');
|
|||
|
warn "$msg\n";
|
|||
|
$msg = msg('bug-report');
|
|||
|
warn "$msg\n";
|
|||
|
my $cline = (caller)[2];
|
|||
|
my $args = join ' ' => @args;
|
|||
|
warn "Version: $VERSION\n";
|
|||
|
warn "Action: $action\n";
|
|||
|
warn "Calling line: $cline\n";
|
|||
|
warn "Output: $line\n";
|
|||
|
$args =~ s/ -c (.+)/-c "$1"/s;
|
|||
|
warn "Command: $PSQL $args\n";
|
|||
|
## Last thing is to see if we can grab the PG version
|
|||
|
if (! $opt{stop_looping}) {
|
|||
|
## Just in case...
|
|||
|
$opt{stop_looping} = 1;
|
|||
|
my $linfo = run_command('SELECT version() AS version');
|
|||
|
(my $v = $linfo->{db}[0]{slurp}[0]{version}) =~ s/(\w+ \S+).+/$1/;
|
|||
|
warn "Postgres version: $v\n";
|
|||
|
}
|
|||
|
exit 1;
|
|||
|
}
|
|||
|
}
|
|||
|
$db->{slurp} = \@stuff;
|
|||
|
} ## end valid system call
|
|||
|
|
|||
|
} ## end each database
|
|||
|
|
|||
|
close $errfh or ndie msg('file-noclose', $errorfile, $!);
|
|||
|
close $tempfh or ndie msg('file-noclose', $tempfile, $!);
|
|||
|
|
|||
|
eval { File::Temp::cleanup(); };
|
|||
|
|
|||
|
$info->{hosts} = keys %host;
|
|||
|
|
|||
|
$VERBOSE >= 3 and warn Dumper $info;
|
|||
|
|
|||
|
if ($DEBUGOUTPUT) {
|
|||
|
if (defined $info->{db} and defined $info->{db}[0]{slurp}) {
|
|||
|
$DEBUG_INFO = $info->{db}[0]{slurp};
|
|||
|
$DEBUG_INFO =~ s/\n/\\n/g;
|
|||
|
$DEBUG_INFO =~ s/\|/<SEP>/g;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return $info;
|
|||
|
|
|||
|
} ## end of run_command
|
|||
|
|
|||
|
|
|||
|
sub setup_target_databases {
|
|||
|
|
|||
|
## Build a list of all databases to connect to.
|
|||
|
## Returns a list of all such databases with connection information:
|
|||
|
## -- dbuser, --dbpass, --dbservice, --port, --dbname, --host
|
|||
|
##
|
|||
|
## Items are determined by host, port, and db arguments
|
|||
|
## Multi-args are grouped together: host, port, dbuser, dbpass
|
|||
|
## Groups are kept together for first pass
|
|||
|
## The final arg in a group is passed on
|
|||
|
##
|
|||
|
## Examples:
|
|||
|
## --host=a,b --port=5433 --db=c
|
|||
|
|
|||
|
## Connects twice to port 5433, using database c, to hosts a and b
|
|||
|
## a-5433-c b-5433-c
|
|||
|
##
|
|||
|
## --host=a,b --port=5433 --db=c,d
|
|||
|
## Connects four times: a-5433-c a-5433-d b-5433-c b-5433-d
|
|||
|
##
|
|||
|
## --host=a,b --host=foo --port=1234 --port=5433 --db=e,f
|
|||
|
## Connects six times: a-1234-e a-1234-f b-1234-e b-1234-f foo-5433-e foo-5433-f
|
|||
|
##
|
|||
|
## --host=a,b --host=x --port=5432,5433 --dbuser=alice --dbuser=bob --db=baz
|
|||
|
## Connects three times: a-5432-alice-baz b-5433-alice-baz x-5433-bob-baz
|
|||
|
|
|||
|
## Returns a list of targets as a hashref
|
|||
|
|
|||
|
my $arg = shift || {};
|
|||
|
|
|||
|
## The final list of targets:
|
|||
|
my @target;
|
|||
|
|
|||
|
## Default connection options
|
|||
|
my $conn =
|
|||
|
{
|
|||
|
host => [$ENV{PGHOST} || '<none>'],
|
|||
|
port => [$ENV{PGPORT} || $opt{defaultport}],
|
|||
|
dbname => [$ENV{PGDATABASE} || $opt{defaultdb}],
|
|||
|
dbuser => [$ENV{PGUSER} || $opt{defaultuser}],
|
|||
|
dbpass => [$ENV{PGPASSWORD} || ''],
|
|||
|
dbservice => [''],
|
|||
|
};
|
|||
|
|
|||
|
## Don't set any default values if a service is being used
|
|||
|
if (defined $opt{dbservice} and defined $opt{dbservice}->[0] and length $opt{dbservice}->[0]) {
|
|||
|
$conn->{dbname} = [];
|
|||
|
$conn->{port} = [];
|
|||
|
$conn->{dbuser} = [];
|
|||
|
}
|
|||
|
|
|||
|
## If we were passed in a target, use that and move on
|
|||
|
if (exists $arg->{target}) {
|
|||
|
## Make a copy, in case we are passed in a ref
|
|||
|
my $newtarget;
|
|||
|
for my $key (keys %$conn) {
|
|||
|
$newtarget->{$key} = exists $arg->{target}{$key} ? $arg->{target}{$key} : $conn->{$key};
|
|||
|
}
|
|||
|
return [$newtarget];
|
|||
|
}
|
|||
|
|
|||
|
## Global count of total places we are connecting to
|
|||
|
## We don't mess with this if using {target} above
|
|||
|
$opt{numdbs} = 0;
|
|||
|
|
|||
|
## The current group number we are looking at
|
|||
|
my $group_num = 0;
|
|||
|
|
|||
|
GROUP: {
|
|||
|
|
|||
|
## This level controls a "group" of targets
|
|||
|
|
|||
|
## Start bubbling all our targets into other stuff
|
|||
|
my %group;
|
|||
|
my $found_new_var = 0;
|
|||
|
|
|||
|
for my $v (keys %$conn) { ## For each connection var such as port, host...
|
|||
|
my $vname = $v;
|
|||
|
|
|||
|
## Check if something exists at the current slot number for this var
|
|||
|
if (defined $opt{$v}->[$group_num]) {
|
|||
|
|
|||
|
my $new = $opt{$v}->[$group_num];
|
|||
|
|
|||
|
## Strip out whitespace unless this is a service or host
|
|||
|
$new =~ s/\s+//g unless $vname eq 'dbservice' or $vname eq 'host';
|
|||
|
|
|||
|
## Set this as the new default for this connection var moving forward
|
|||
|
$conn->{$vname} = [split /,/ => $new];
|
|||
|
|
|||
|
## Make a note that we found something new this round
|
|||
|
$found_new_var = 1;
|
|||
|
}
|
|||
|
|
|||
|
$group{$vname} = $conn->{$vname};
|
|||
|
}
|
|||
|
|
|||
|
## If we found nothing new, we must be done building our groups
|
|||
|
last GROUP if ! $found_new_var and @target;
|
|||
|
|
|||
|
$group_num++;
|
|||
|
|
|||
|
## Now break the newly created group into individual targets
|
|||
|
my $tbin = 0;
|
|||
|
TARGET: {
|
|||
|
my $foundtarget = 0;
|
|||
|
my %temptarget;
|
|||
|
for my $g (keys %group) {
|
|||
|
if (defined $group{$g}->[$tbin]) {
|
|||
|
$conn->{$g} = [$group{$g}->[$tbin]];
|
|||
|
$foundtarget = 1;
|
|||
|
}
|
|||
|
$temptarget{$g} = $conn->{$g}[0];
|
|||
|
}
|
|||
|
|
|||
|
## Leave if nothing new
|
|||
|
last TARGET if ! $foundtarget;
|
|||
|
|
|||
|
## Add to our master list
|
|||
|
push @target => \%temptarget;
|
|||
|
|
|||
|
$tbin++;
|
|||
|
|
|||
|
redo TARGET;
|
|||
|
|
|||
|
} ## end TARGET
|
|||
|
|
|||
|
last GROUP if ! $found_new_var;
|
|||
|
|
|||
|
redo GROUP;
|
|||
|
|
|||
|
} ## end GROUP
|
|||
|
|
|||
|
return @target;
|
|||
|
|
|||
|
} ## end of setup_target_databases
|
|||
|
|
|||
|
|
|||
|
sub verify_version {
|
|||
|
|
|||
|
## Check if the backend can handle the current action
|
|||
|
my $limit = $testaction{lc $action} || '';
|
|||
|
|
|||
|
my $versiononly = shift || 0;
|
|||
|
|
|||
|
return if ! $limit and ! $versiononly;
|
|||
|
|
|||
|
## We almost always need the version, so just grab it for any limitation
|
|||
|
$SQL = q{SELECT setting FROM pg_settings WHERE name = 'server_version'};
|
|||
|
my $oldslurp = $db->{slurp} || '';
|
|||
|
my $info = run_command($SQL, {noverify => 1});
|
|||
|
if (defined $info->{db}[0]
|
|||
|
and exists $info->{db}[0]{error}
|
|||
|
and defined $info->{db}[0]{error}
|
|||
|
) {
|
|||
|
ndie $info->{db}[0]{error};
|
|||
|
}
|
|||
|
|
|||
|
if (!defined $info->{db}[0] or $info->{db}[0]{slurp}[0]{setting} !~ /((\d+)\.(\d+))/) {
|
|||
|
ndie msg('die-badversion', $SQL);
|
|||
|
}
|
|||
|
my ($sver,$smaj,$smin) = ($1,$2,$3);
|
|||
|
|
|||
|
if ($versiononly) {
|
|||
|
return $sver;
|
|||
|
}
|
|||
|
|
|||
|
if ($limit =~ /VERSION: ((\d+)\.(\d+))/) {
|
|||
|
my ($rver,$rmaj,$rmin) = ($1,$2,$3);
|
|||
|
if ($smaj < $rmaj or ($smaj==$rmaj and $smin < $rmin)) {
|
|||
|
ndie msg('die-action-version', $action, $rver, $sver);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
while ($limit =~ /\bON: (\w+)(?:\(([<>=])(\d+\.\d+)\))?/g) {
|
|||
|
my ($setting,$op,$ver) = ($1,$2||'',$3||0);
|
|||
|
if ($ver) {
|
|||
|
next if $op eq '<' and $sver >= $ver;
|
|||
|
next if $op eq '>' and $sver <= $ver;
|
|||
|
next if $op eq '=' and $sver != $ver;
|
|||
|
}
|
|||
|
|
|||
|
$SQL = qq{SELECT setting FROM pg_settings WHERE name = '$setting'};
|
|||
|
my $info2 = run_command($SQL);
|
|||
|
if (!defined $info2->{db}[0]) {
|
|||
|
ndie msg('die-nosetting', $setting);
|
|||
|
}
|
|||
|
my $val = $info2->{db}[0]{slurp}[0]{setting};
|
|||
|
if ($val !~ /^\s*on\b/) {
|
|||
|
ndie msg('die-noset', $action, $setting);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
$db->{slurp} = $oldslurp;
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of verify_version
|
|||
|
|
|||
|
|
|||
|
sub size_in_bytes { ## no critic (RequireArgUnpacking)
|
|||
|
|
|||
|
## Given a number and a unit, return the number of bytes.
|
|||
|
## Defaults to bytes
|
|||
|
|
|||
|
my ($val,$unit) = ($_[0],lc substr($_[1]||'s',0,1));
|
|||
|
return $val * ($unit eq 'b' ? 1 : $unit eq 'k' ? 1024 : $unit eq 'm' ? 1024**2 :
|
|||
|
$unit eq 'g' ? 1024**3 : $unit eq 't' ? 1024**4 :
|
|||
|
$unit eq 'p' ? 1024**5 : $unit eq 'e' ? 1024**6 :
|
|||
|
$unit eq 'z' ? 1024**7 : 1);
|
|||
|
|
|||
|
} ## end of size_in_bytes
|
|||
|
|
|||
|
|
|||
|
sub size_in_seconds {
|
|||
|
|
|||
|
my ($string,$type) = @_;
|
|||
|
|
|||
|
return '' if ! length $string;
|
|||
|
if ($string !~ $timere) {
|
|||
|
ndie msg('die-badtime', $type, substr($type,0,1));
|
|||
|
}
|
|||
|
my ($val,$unit) = ($1,lc substr($2||'s',0,1));
|
|||
|
my $tempval = sprintf '%.9f', $val * (
|
|||
|
$unit eq 's' ? 1 :
|
|||
|
$unit eq 'm' ? 60 :
|
|||
|
$unit eq 'h' ? 3600 :
|
|||
|
$unit eq 'd' ? 86400 :
|
|||
|
$unit eq 'w' ? 604800 :
|
|||
|
$unit eq 'y' ? 31536000 :
|
|||
|
ndie msg('die-badtime', $type, substr($type,0,1))
|
|||
|
);
|
|||
|
$tempval =~ s/0+$//;
|
|||
|
$tempval = int $tempval if $tempval =~ /\.$/;
|
|||
|
return $tempval;
|
|||
|
|
|||
|
} ## end of size_in_seconds
|
|||
|
|
|||
|
|
|||
|
sub skip_item {
|
|||
|
|
|||
|
## Determine if something should be skipped due to inclusion/exclusion options
|
|||
|
## Exclusion checked first: inclusion can pull it back in.
|
|||
|
my $name = shift;
|
|||
|
my $schema = shift || '';
|
|||
|
|
|||
|
my $stat = 0;
|
|||
|
## Is this excluded?
|
|||
|
if (defined $opt{exclude}) {
|
|||
|
$stat = 1;
|
|||
|
for (@{$opt{exclude}}) {
|
|||
|
for my $ex (split /\s*,\s*/o => $_) {
|
|||
|
if ($ex =~ s/\.$//) {
|
|||
|
if ($ex =~ s/^~//) {
|
|||
|
($stat += 2 and last) if $schema =~ /$ex/;
|
|||
|
}
|
|||
|
else {
|
|||
|
($stat += 2 and last) if $schema eq $ex;
|
|||
|
}
|
|||
|
}
|
|||
|
elsif ($ex =~ s/^~//) {
|
|||
|
($stat += 2 and last) if $name =~ /$ex/;
|
|||
|
}
|
|||
|
else {
|
|||
|
($stat += 2 and last) if $name eq $ex;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
if (defined $opt{include}) {
|
|||
|
$stat += 4;
|
|||
|
for (@{$opt{include}}) {
|
|||
|
for my $in (split /\s*,\s*/o => $_) {
|
|||
|
if ($in =~ s/\.$//) {
|
|||
|
if ($in =~ s/^~//) {
|
|||
|
($stat += 8 and last) if $schema =~ /$in/;
|
|||
|
}
|
|||
|
else {
|
|||
|
($stat += 8 and last) if $schema eq $in;
|
|||
|
}
|
|||
|
}
|
|||
|
elsif ($in =~ s/^~//) {
|
|||
|
($stat += 8 and last) if $name =~ /$in/;
|
|||
|
}
|
|||
|
else {
|
|||
|
($stat += 8 and last) if $name eq $in;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
## Easiest to state the cases when we DO skip:
|
|||
|
return 1 if
|
|||
|
3 == $stat ## exclude matched, no inclusion checking
|
|||
|
or 4 == $stat ## include check only, no match
|
|||
|
or 7 == $stat; ## exclude match, no inclusion match
|
|||
|
|
|||
|
return 0;
|
|||
|
|
|||
|
} ## end of skip_item
|
|||
|
|
|||
|
|
|||
|
sub validate_range {
|
|||
|
|
|||
|
## Valid that warning and critical are set correctly.
|
|||
|
## Returns new values of both
|
|||
|
|
|||
|
my $arg = shift;
|
|||
|
defined $arg and ref $arg eq 'HASH' or ndie qq{validate_range must be called with a hashref\n};
|
|||
|
|
|||
|
return ('','') if $MRTG and !$arg->{forcemrtg};
|
|||
|
|
|||
|
my $type = $arg->{type} or ndie qq{validate_range must be provided a 'type'\n};
|
|||
|
|
|||
|
## The 'default default' is an empty string, which should fail all mandatory tests
|
|||
|
## We only set the 'arg' default if neither option is provided.
|
|||
|
my $warning = exists $opt{warning} ? $opt{warning} :
|
|||
|
exists $opt{critical} ? '' : $arg->{default_warning} || '';
|
|||
|
my $critical = exists $opt{critical} ? $opt{critical} :
|
|||
|
exists $opt{warning} ? '' : $arg->{default_critical} || '';
|
|||
|
|
|||
|
if ('string' eq $type) {
|
|||
|
## Don't use this unless you have to
|
|||
|
}
|
|||
|
elsif ('seconds' eq $type) {
|
|||
|
if (length $warning) {
|
|||
|
if ($warning !~ $timesecre) {
|
|||
|
ndie msg('range-seconds', 'warning');
|
|||
|
}
|
|||
|
$warning = $1;
|
|||
|
}
|
|||
|
if (length $critical) {
|
|||
|
if ($critical !~ $timesecre) {
|
|||
|
ndie msg('range-seconds', 'critical')
|
|||
|
}
|
|||
|
$critical = $1;
|
|||
|
if (length $warning and $warning > $critical) {
|
|||
|
ndie msg('range-warnbigtime', $warning, $critical);
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
elsif ('time' eq $type) {
|
|||
|
$critical = size_in_seconds($critical, 'critical');
|
|||
|
$warning = size_in_seconds($warning, 'warning');
|
|||
|
if (! length $critical and ! length $warning) {
|
|||
|
ndie msg('range-notime');
|
|||
|
}
|
|||
|
if (length $warning and length $critical and $warning > $critical) {
|
|||
|
ndie msg('range-warnbigtime', $warning, $critical);
|
|||
|
}
|
|||
|
}
|
|||
|
elsif ('version' eq $type) {
|
|||
|
my $msg = msg('range-version');
|
|||
|
if (length $warning and $warning !~ /^\d+\.\d+\.?[\d\w]*$/) {
|
|||
|
ndie msg('range-badversion', 'warning', $msg);
|
|||
|
}
|
|||
|
if (length $critical and $critical !~ /^\d+\.\d+\.?[\d\w]*$/) {
|
|||
|
ndie msg('range-badversion', 'critical', $msg);
|
|||
|
}
|
|||
|
if (! length $critical and ! length $warning) {
|
|||
|
ndie msg('range-noopt-orboth');
|
|||
|
}
|
|||
|
}
|
|||
|
elsif ('size' eq $type) {
|
|||
|
if (length $critical) {
|
|||
|
if ($critical !~ $sizere) {
|
|||
|
ndie msg('range-badsize', 'critical');
|
|||
|
}
|
|||
|
$critical = size_in_bytes($1,$2);
|
|||
|
}
|
|||
|
if (length $warning) {
|
|||
|
if ($warning !~ $sizere) {
|
|||
|
ndie msg('range-badsize', 'warning');
|
|||
|
}
|
|||
|
$warning = size_in_bytes($1,$2);
|
|||
|
if (length $critical and $warning > $critical) {
|
|||
|
ndie msg('range-warnbigsize', $warning, $critical);
|
|||
|
}
|
|||
|
}
|
|||
|
elsif (!length $critical) {
|
|||
|
ndie msg('range-nosize');
|
|||
|
}
|
|||
|
}
|
|||
|
elsif ($type =~ /integer/) {
|
|||
|
$warning =~ s/_//g;
|
|||
|
if (length $warning and $warning !~ /^[-+]?\d+$/) {
|
|||
|
ndie $type =~ /positive/ ? msg('range-int-pos', 'warning') : msg('range-int', 'warning');
|
|||
|
}
|
|||
|
elsif (length $warning and $type =~ /positive/ and $warning <= 0) {
|
|||
|
ndie msg('range-int-pos', 'warning');
|
|||
|
}
|
|||
|
|
|||
|
$critical =~ s/_//g;
|
|||
|
if (length $critical and $critical !~ /^[-+]?\d+$/) {
|
|||
|
ndie $type =~ /positive/ ? msg('range-int-pos', 'critical') : msg('range-int', 'critical');
|
|||
|
}
|
|||
|
elsif (length $critical and $type =~ /positive/ and $critical <= 0) {
|
|||
|
ndie msg('range-int-pos', 'critical');
|
|||
|
}
|
|||
|
|
|||
|
if (length $warning
|
|||
|
and length $critical
|
|||
|
and (
|
|||
|
($opt{reverse} and $warning < $critical)
|
|||
|
or
|
|||
|
(!$opt{reverse} and $warning > $critical)
|
|||
|
)
|
|||
|
) {
|
|||
|
ndie msg('range-warnbig');
|
|||
|
}
|
|||
|
if ($type !~ /string/) {
|
|||
|
$warning = int $warning if length $warning;
|
|||
|
$critical = int $critical if length $critical;
|
|||
|
}
|
|||
|
}
|
|||
|
elsif ('restringex' eq $type) {
|
|||
|
if (! length $critical and ! length $warning) {
|
|||
|
ndie msg('range-noopt-one');
|
|||
|
}
|
|||
|
if (length $critical and length $warning) {
|
|||
|
ndie msg('range-noopt-only');
|
|||
|
}
|
|||
|
my $string = length $critical ? $critical : $warning;
|
|||
|
my $regex = ($string =~ s/^~//) ? '~' : '=';
|
|||
|
$string =~ /^\w+$/ or ndie msg('invalid-option');
|
|||
|
}
|
|||
|
elsif ('percent' eq $type) {
|
|||
|
if (length $critical) {
|
|||
|
if ($critical !~ /^(\d+)\%$/) {
|
|||
|
ndie msg('range-badpercent', 'critical');
|
|||
|
}
|
|||
|
$critical = $1;
|
|||
|
}
|
|||
|
if (length $warning) {
|
|||
|
if ($warning !~ /^(\d+)\%$/) {
|
|||
|
ndie msg('range-badpercent', 'warning');
|
|||
|
}
|
|||
|
$warning = $1;
|
|||
|
}
|
|||
|
}
|
|||
|
elsif ('size or percent' eq $type) {
|
|||
|
if (length $critical) {
|
|||
|
if ($critical =~ $sizere) {
|
|||
|
$critical = size_in_bytes($1,$2);
|
|||
|
}
|
|||
|
elsif ($critical !~ /^\d+\%$/) {
|
|||
|
ndie msg('range-badpercsize', 'critical');
|
|||
|
}
|
|||
|
}
|
|||
|
if (length $warning) {
|
|||
|
if ($warning =~ $sizere) {
|
|||
|
$warning = size_in_bytes($1,$2);
|
|||
|
}
|
|||
|
elsif ($warning !~ /^\d+\%$/) {
|
|||
|
ndie msg('range-badpercsize', 'warning');
|
|||
|
}
|
|||
|
}
|
|||
|
elsif (! length $critical) {
|
|||
|
ndie msg('range-noopt-size');
|
|||
|
}
|
|||
|
}
|
|||
|
elsif ('checksum' eq $type) {
|
|||
|
if (length $critical and $critical !~ $checksumre and $critical ne '0') {
|
|||
|
ndie msg('range-badcs', 'critical');
|
|||
|
}
|
|||
|
if (length $warning and $warning !~ $checksumre) {
|
|||
|
ndie msg('range-badcs', 'warning');
|
|||
|
}
|
|||
|
}
|
|||
|
elsif ('multival' eq $type) { ## Simple number, or foo=#;bar=#
|
|||
|
## Note: only used for check_locks
|
|||
|
my %err;
|
|||
|
while ($critical =~ /(\w+)\s*=\s*(\d+)/gi) {
|
|||
|
my ($name,$val) = (lc $1,$2);
|
|||
|
$name =~ s/lock$//;
|
|||
|
$err{$name} = $val;
|
|||
|
}
|
|||
|
if (keys %err) {
|
|||
|
$critical = \%err;
|
|||
|
}
|
|||
|
elsif (length $critical and $critical =~ /^(\d+)$/) {
|
|||
|
$err{total} = $1;
|
|||
|
$critical = \%err;
|
|||
|
}
|
|||
|
elsif (length $critical) {
|
|||
|
ndie msg('range-badlock', 'critical');
|
|||
|
}
|
|||
|
my %warn;
|
|||
|
while ($warning =~ /(\w+)\s*=\s*(\d+)/gi) {
|
|||
|
my ($name,$val) = (lc $1,$2);
|
|||
|
$name =~ s/lock$//;
|
|||
|
$warn{$name} = $val;
|
|||
|
}
|
|||
|
if (keys %warn) {
|
|||
|
$warning = \%warn;
|
|||
|
}
|
|||
|
elsif (length $warning and $warning =~ /^(\d+)$/) {
|
|||
|
$warn{total} = $1;
|
|||
|
$warning = \%warn;
|
|||
|
}
|
|||
|
elsif (length $warning) {
|
|||
|
ndie msg('range-badlock', 'warning');
|
|||
|
}
|
|||
|
}
|
|||
|
elsif ('cacti' eq $type) { ## Takes no args, just dumps data
|
|||
|
if (length $warning or length $critical) {
|
|||
|
ndie msg('range-cactionly');
|
|||
|
}
|
|||
|
}
|
|||
|
else {
|
|||
|
ndie msg('range-badtype', $type);
|
|||
|
}
|
|||
|
|
|||
|
if ($arg->{both}) {
|
|||
|
if (! length $warning or ! length $critical) {
|
|||
|
ndie msg('range-noopt-both');
|
|||
|
}
|
|||
|
}
|
|||
|
if ($arg->{leastone}) {
|
|||
|
if (! length $warning and ! length $critical) {
|
|||
|
ndie msg('range-noopt-one');
|
|||
|
}
|
|||
|
}
|
|||
|
elsif ($arg->{onlyone}) {
|
|||
|
if (length $warning and length $critical) {
|
|||
|
ndie msg('range-noopt-only');
|
|||
|
}
|
|||
|
if (! length $warning and ! length $critical) {
|
|||
|
ndie msg('range-noopt-one');
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return ($warning,$critical);
|
|||
|
|
|||
|
} ## end of validate_range
|
|||
|
|
|||
|
|
|||
|
sub validate_size_or_percent_with_oper {
|
|||
|
|
|||
|
my $arg = shift || {};
|
|||
|
ndie qq{validate_range must be called with a hashref\n}
|
|||
|
unless ref $arg eq 'HASH';
|
|||
|
|
|||
|
my $warning = exists $opt{warning} ? $opt{warning} :
|
|||
|
exists $opt{critical} ? '' : $arg->{default_warning} || '';
|
|||
|
my $critical = exists $opt{critical} ? $opt{critical} :
|
|||
|
exists $opt{warning} ? '' : $arg->{default_critical} || '';
|
|||
|
|
|||
|
ndie msg('range-noopt-size') unless length $critical || length $warning;
|
|||
|
my @subs;
|
|||
|
for my $val ($warning, $critical) {
|
|||
|
if ($val =~ /^(.+?)\s([&|]{2}|and|or)\s(.+)$/i) {
|
|||
|
my ($l, $op, $r) = ($1, $2, $3);
|
|||
|
local $opt{warning} = $l;
|
|||
|
local $opt{critical} = 0;
|
|||
|
($l) = validate_range({ type => 'size or percent' });
|
|||
|
$opt{warning} = $r;
|
|||
|
($r) = validate_range({ type => 'size or percent' });
|
|||
|
if ($l =~ s/%$//) {
|
|||
|
($l, $r) = ($r, $l);
|
|||
|
}
|
|||
|
else {
|
|||
|
$r =~ s/%$//;
|
|||
|
}
|
|||
|
push @subs, $op eq '&&' || lc $op eq 'and' ? sub {
|
|||
|
$_[0] >= $l && $_[1] >= $r;
|
|||
|
} : sub {
|
|||
|
$_[0] >= $l || $_[1] >= $r;
|
|||
|
};
|
|||
|
}
|
|||
|
else {
|
|||
|
local $opt{warning} = $val;
|
|||
|
local $opt{critical} = 0;
|
|||
|
my ($v) = validate_range({ type => 'size or percent' });
|
|||
|
push @subs, !length $v ? sub { 0 }
|
|||
|
: $v =~ s/%$// ? sub { $_[1] >= $v }
|
|||
|
: sub { $_[0] >= $v };
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return @subs;
|
|||
|
|
|||
|
} ## end of validate_size_or_percent_with_oper
|
|||
|
|
|||
|
|
|||
|
sub validate_integer_for_time {
|
|||
|
|
|||
|
my $arg = shift || {};
|
|||
|
ndie qq{validate_integer_for_time must be called with a hashref\n}
|
|||
|
unless ref $arg eq 'HASH';
|
|||
|
|
|||
|
my $warning = exists $opt{warning} ? $opt{warning} :
|
|||
|
exists $opt{critical} ? '' : $arg->{default_warning} || '';
|
|||
|
my $critical = exists $opt{critical} ? $opt{critical} :
|
|||
|
exists $opt{warning} ? '' : $arg->{default_critical} || '';
|
|||
|
ndie msg('range-nointfortime', 'critical') unless length $critical or length $warning;
|
|||
|
|
|||
|
my @ret;
|
|||
|
for my $spec ([ warning => $warning], [critical => $critical]) {
|
|||
|
my ($level, $val) = @{ $spec };
|
|||
|
if (length $val) {
|
|||
|
if ($val =~ /^(.+?)\sfor\s(.+)$/i) {
|
|||
|
my ($int, $time) = ($1, $2);
|
|||
|
|
|||
|
# Integer first, time second.
|
|||
|
($int, $time) = ($time, $int)
|
|||
|
if $int =~ /[a-zA-Z]$/ || $time =~ /^[-+]\d+$/;
|
|||
|
|
|||
|
# Determine the values.
|
|||
|
$time = size_in_seconds($time, $level);
|
|||
|
ndie msg('range-int', $level) if $time !~ /^[-+]?\d+$/;
|
|||
|
push @ret, int $int, $time;
|
|||
|
}
|
|||
|
else {
|
|||
|
# Disambiguate int from time int by sign.
|
|||
|
if ($val =~ /^[-+]\d+$/) {
|
|||
|
ndie msg('range-int', $level) if $val !~ /^[-+]?\d+$/;
|
|||
|
push @ret, int $val, '';
|
|||
|
}
|
|||
|
else {
|
|||
|
# Assume time for backwards compatibility.
|
|||
|
push @ret, '', size_in_seconds($val, $level);
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
else {
|
|||
|
push @ret, '', '';
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return @ret;
|
|||
|
|
|||
|
} ## end of validate_integer_for_time
|
|||
|
|
|||
|
|
|||
|
sub perfname {
|
|||
|
|
|||
|
## Return a safe label name for Nagios performance data
|
|||
|
my $name = shift;
|
|||
|
|
|||
|
my $escape = 0;
|
|||
|
|
|||
|
$name =~ s/'/''/g and $escape++;
|
|||
|
|
|||
|
if ($escape or index($name, ' ') >=0) {
|
|||
|
$name = qq{'$name'};
|
|||
|
}
|
|||
|
|
|||
|
return $name;
|
|||
|
|
|||
|
} ## end of perfname;
|
|||
|
|
|||
|
|
|||
|
sub open_controldata {
|
|||
|
## Requires $ENV{PGDATA} or --datadir
|
|||
|
|
|||
|
## Find the data directory, make sure it exists
|
|||
|
my $dir = $opt{datadir} || $ENV{PGDATA};
|
|||
|
|
|||
|
if (!defined $dir or ! length $dir) {
|
|||
|
ndie msg('checkpoint-nodir');
|
|||
|
}
|
|||
|
|
|||
|
if (! -d $dir) {
|
|||
|
ndie msg('checkpoint-baddir', $dir);
|
|||
|
}
|
|||
|
|
|||
|
## Run pg_controldata
|
|||
|
my $pgc
|
|||
|
= $ENV{PGCONTROLDATA} ? $ENV{PGCONTROLDATA}
|
|||
|
: $ENV{PGBINDIR} ? "$ENV{PGBINDIR}/pg_controldata"
|
|||
|
: 'pg_controldata';
|
|||
|
$COM = qq{$pgc "$dir"};
|
|||
|
eval {
|
|||
|
$res = qx{$COM 2>&1};
|
|||
|
};
|
|||
|
if ($@) {
|
|||
|
ndie msg('checkpoint-nosys', $@);
|
|||
|
}
|
|||
|
|
|||
|
## If the path is echoed back, we most likely have an invalid data dir
|
|||
|
if ($res =~ /$dir/) {
|
|||
|
ndie msg('checkpoint-baddir2', $dir);
|
|||
|
}
|
|||
|
|
|||
|
if ($res =~ /WARNING: Calculated CRC checksum/) {
|
|||
|
ndie msg('checkpoint-badver', $dir);
|
|||
|
}
|
|||
|
if ($res !~ /^pg_control.+\d+/) {
|
|||
|
ndie msg('checkpoint-badver2');
|
|||
|
}
|
|||
|
|
|||
|
## return the pg_controldata output
|
|||
|
return $res;
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
sub check_archive_ready {
|
|||
|
|
|||
|
## Check on the number of WAL archive with status "ready"
|
|||
|
## Supports: Nagios, MRTG
|
|||
|
## Must run as a superuser
|
|||
|
## Critical and warning are the number of files
|
|||
|
## Example: --critical=10
|
|||
|
|
|||
|
return check_wal_files('/archive_status', '.ready', 10, 15);
|
|||
|
|
|||
|
} ## end of check_archive_ready
|
|||
|
|
|||
|
|
|||
|
sub check_autovac_freeze {
|
|||
|
|
|||
|
## Check how close all databases are to autovacuum_freeze_max_age
|
|||
|
## Supports: Nagios, MRTG
|
|||
|
## It makes no sense to run this more than once on the same cluster
|
|||
|
## Warning and criticals are percentages
|
|||
|
## Can also ignore databases with exclude, and limit with include
|
|||
|
|
|||
|
my ($warning, $critical) = validate_range
|
|||
|
({
|
|||
|
type => 'percent',
|
|||
|
default_warning => '90%',
|
|||
|
default_critical => '95%',
|
|||
|
forcemrtg => 1,
|
|||
|
});
|
|||
|
|
|||
|
(my $w = $warning) =~ s/\D//;
|
|||
|
(my $c = $critical) =~ s/\D//;
|
|||
|
|
|||
|
my $SQL = q{SELECT freez, txns, ROUND(100*(txns/freez::float)) AS perc, datname}.
|
|||
|
q{ FROM (SELECT foo.freez::int, age(datfrozenxid) AS txns, datname}.
|
|||
|
q{ FROM pg_database d JOIN (SELECT setting AS freez FROM pg_settings WHERE name = 'autovacuum_freeze_max_age') AS foo}.
|
|||
|
q{ ON (true) WHERE d.datallowconn) AS foo2 ORDER BY 3 DESC, 4 ASC};
|
|||
|
|
|||
|
my $info = run_command($SQL, {regex => qr{\w+} } );
|
|||
|
|
|||
|
$db = $info->{db}[0];
|
|||
|
|
|||
|
my (@crit,@warn,@ok);
|
|||
|
my ($maxp,$maxt,$maxdb) = (0,0,''); ## used by MRTG only
|
|||
|
SLURP: for my $r (@{$db->{slurp}}) {
|
|||
|
next SLURP if skip_item($r->{datname});
|
|||
|
|
|||
|
if ($MRTG) {
|
|||
|
if ($r->{perc} > $maxp) {
|
|||
|
$maxdb = $r->{datname};
|
|||
|
$maxp = $r->{perc};
|
|||
|
}
|
|||
|
elsif ($r->{perc} == $maxp) {
|
|||
|
$maxdb .= sprintf '%s%s', (length $maxdb ? ' | ' : ''), $r->{datname};
|
|||
|
}
|
|||
|
$maxt = $r->{txns} if $r->{txns} > $maxt;
|
|||
|
next SLURP;
|
|||
|
}
|
|||
|
|
|||
|
my $msg = sprintf ' %s=%s%%;%s;%s', perfname($r->{datname}), $r->{perc}, $w, $c;
|
|||
|
$db->{perf} .= " $msg";
|
|||
|
if (length $critical and $r->{perc} >= $c) {
|
|||
|
push @crit => $msg;
|
|||
|
}
|
|||
|
elsif (length $warning and $r->{perc} >= $w) {
|
|||
|
push @warn => $msg;
|
|||
|
}
|
|||
|
else {
|
|||
|
push @ok => $msg;
|
|||
|
}
|
|||
|
}
|
|||
|
if ($MRTG) {
|
|||
|
do_mrtg({one => $maxp, two => $maxt, msg => $maxdb});
|
|||
|
}
|
|||
|
if (@crit) {
|
|||
|
add_critical join ' ' => @crit;
|
|||
|
}
|
|||
|
elsif (@warn) {
|
|||
|
add_warning join ' ' => @warn;
|
|||
|
}
|
|||
|
else {
|
|||
|
add_ok join ' ' => @ok;
|
|||
|
}
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_autovac_freeze
|
|||
|
|
|||
|
|
|||
|
sub check_backends {
|
|||
|
|
|||
|
## Check the number of connections
|
|||
|
## Supports: Nagios, MRTG
|
|||
|
## It makes no sense to run this more than once on the same cluster
|
|||
|
## Need to be superuser, else only your queries will be visible
|
|||
|
## Warning and criticals can take three forms:
|
|||
|
## critical = 12 -- complain if there are 12 or more connections
|
|||
|
## critical = 95% -- complain if >= 95% of available connections are used
|
|||
|
## critical = -5 -- complain if there are only 5 or fewer connection slots left
|
|||
|
## The former two options only work with simple numbers - no percentage or negative
|
|||
|
## Can also ignore databases with exclude, and limit with include
|
|||
|
|
|||
|
my $warning = $opt{warning} || '90%';
|
|||
|
my $critical = $opt{critical} || '95%';
|
|||
|
my $noidle = $opt{noidle} || 0;
|
|||
|
|
|||
|
## If only critical was used, remove the default warning
|
|||
|
if ($opt{critical} and !$opt{warning}) {
|
|||
|
$warning = $critical;
|
|||
|
}
|
|||
|
|
|||
|
my $validre = qr{^(\-?)(\d+)(\%?)$};
|
|||
|
if ($critical !~ $validre) {
|
|||
|
ndie msg('backends-users', 'Critical');
|
|||
|
}
|
|||
|
my ($e1,$e2,$e3) = ($1,$2,$3);
|
|||
|
if ($warning !~ $validre) {
|
|||
|
ndie msg('backends-users', 'Warning');
|
|||
|
}
|
|||
|
my ($w1,$w2,$w3) = ($1,$2,$3);
|
|||
|
|
|||
|
## If number is greater, all else is same, and not minus
|
|||
|
if ($w2 > $e2 and $w1 eq $e1 and $w3 eq $e3 and $w1 eq '') {
|
|||
|
ndie msg('range-warnbig');
|
|||
|
}
|
|||
|
## If number is less, all else is same, and minus
|
|||
|
if ($w2 < $e2 and $w1 eq $e1 and $w3 eq $e3 and $w1 eq '-') {
|
|||
|
ndie msg('range-warnsmall');
|
|||
|
}
|
|||
|
if (($w1 and $w3) or ($e1 and $e3)) {
|
|||
|
ndie msg('range-neg-percent');
|
|||
|
}
|
|||
|
|
|||
|
my $MAXSQL = q{SELECT setting AS mc FROM pg_settings WHERE name = 'max_connections'};
|
|||
|
|
|||
|
my $NOIDLE = $noidle ? q{WHERE current_query <> '<IDLE>'} : '';
|
|||
|
$SQL = qq{
|
|||
|
SELECT COUNT(datid) AS current,
|
|||
|
($MAXSQL) AS mc,
|
|||
|
d.datname
|
|||
|
FROM pg_database d
|
|||
|
LEFT JOIN pg_stat_activity s ON (s.datid = d.oid) $NOIDLE
|
|||
|
GROUP BY 2,3
|
|||
|
ORDER BY datname
|
|||
|
};
|
|||
|
my $info = run_command($SQL, {regex => qr{\d+}, fatalregex => 'too many clients' } );
|
|||
|
|
|||
|
$db = $info->{db}[0];
|
|||
|
|
|||
|
## If we cannot connect because of too many clients, we treat as a critical error
|
|||
|
if (exists $info->{fatalregex}) {
|
|||
|
my $regmsg = msg('backends-po');
|
|||
|
my $regmsg2 = msg_en('backends-po');
|
|||
|
if ($info->{fatalregex} =~ /$regmsg/ or $info->{fatalregex} =~ /$regmsg2/) {
|
|||
|
add_critical msg('backends-fatal');
|
|||
|
return;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
## There may be no entries returned if we catch pg_stat_activity at the right
|
|||
|
## moment in older versions of Postgres
|
|||
|
if (! defined $db) {
|
|||
|
$info = run_command($MAXSQL, {regex => qr[\d] } );
|
|||
|
$db = $info->{db}[0];
|
|||
|
if (!defined $db->{slurp} or $db->{slurp} !~ /(\d+)/) {
|
|||
|
undef %unknown;
|
|||
|
add_unknown msg('backends-nomax');
|
|||
|
return;
|
|||
|
}
|
|||
|
my $limit = $1;
|
|||
|
if ($MRTG) {
|
|||
|
do_mrtg({one => 1, msg => msg('backends-mrtg', $db->{dbname}, $limit)});
|
|||
|
}
|
|||
|
my $percent = (int 1/$limit*100) || 1;
|
|||
|
add_ok msg('backends-msg', 1, $limit, $percent);
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
my $total = 0;
|
|||
|
my $grandtotal = @{$db->{slurp}};
|
|||
|
|
|||
|
## If no max_connections, something is wrong
|
|||
|
if ($db->{slurp}[0]{mc} !~ /\d/) {
|
|||
|
add_unknown msg('backends-nomax');
|
|||
|
return;
|
|||
|
}
|
|||
|
my $limit = $db->{slurp}[0]{mc};
|
|||
|
|
|||
|
for my $r (@{$db->{slurp}}) {
|
|||
|
|
|||
|
## Always want perf to show all
|
|||
|
my $nwarn=$w2;
|
|||
|
my $ncrit=$e2;
|
|||
|
if ($e1) {
|
|||
|
$ncrit = $limit-$e2;
|
|||
|
}
|
|||
|
elsif ($e3) {
|
|||
|
$ncrit = (int $e2*$limit/100);
|
|||
|
}
|
|||
|
if ($w1) {
|
|||
|
$nwarn = $limit-$w2;
|
|||
|
}
|
|||
|
elsif ($w3) {
|
|||
|
$nwarn = (int $w2*$limit/100)
|
|||
|
}
|
|||
|
|
|||
|
if (! skip_item($r->{datname})) {
|
|||
|
$db->{perf} .= sprintf ' %s=%s;%s;%s;0;%s',
|
|||
|
perfname($r->{datname}), $r->{current}, $nwarn, $ncrit, $limit;
|
|||
|
$total += $r->{current};
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if ($MRTG) {
|
|||
|
do_mrtg({one => $total, msg => msg('backends-mrtg', $db->{dbname}, $limit)});
|
|||
|
}
|
|||
|
|
|||
|
if (!$total) {
|
|||
|
if ($grandtotal) {
|
|||
|
## We assume that exclude/include rules are correct, and we simply had no entries
|
|||
|
## at all in the specific databases we wanted
|
|||
|
add_ok msg('backends-oknone');
|
|||
|
}
|
|||
|
else {
|
|||
|
add_unknown msg('no-match-db');
|
|||
|
}
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
my $percent = (int $total / $limit*100) || 1;
|
|||
|
my $msg = msg('backends-msg', $total, $limit, $percent);
|
|||
|
my $ok = 1;
|
|||
|
|
|||
|
if ($e1) { ## minus
|
|||
|
$ok = 0 if $limit-$total <= $e2;
|
|||
|
}
|
|||
|
elsif ($e3) { ## percent
|
|||
|
my $nowpercent = $total/$limit*100;
|
|||
|
$ok = 0 if $nowpercent >= $e2;
|
|||
|
}
|
|||
|
else { ## raw number
|
|||
|
$ok = 0 if $total >= $e2;
|
|||
|
}
|
|||
|
if (!$ok) {
|
|||
|
add_critical $msg;
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
if ($w1) {
|
|||
|
$ok = 0 if $limit-$total <= $w2;
|
|||
|
}
|
|||
|
elsif ($w3) {
|
|||
|
my $nowpercent = $total/$limit*100;
|
|||
|
$ok = 0 if $nowpercent >= $w2;
|
|||
|
}
|
|||
|
else {
|
|||
|
$ok = 0 if $total >= $w2;
|
|||
|
}
|
|||
|
if (!$ok) {
|
|||
|
add_warning $msg;
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
add_ok $msg;
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_backends
|
|||
|
|
|||
|
|
|||
|
sub check_bloat {
|
|||
|
|
|||
|
## Check how bloated the tables and indexes are
|
|||
|
## Supports: Nagios, MRTG
|
|||
|
## NOTE! This check depends on ANALYZE being run regularly
|
|||
|
## Also requires stats collection to be on
|
|||
|
## This action may be very slow on large databases
|
|||
|
## By default, checks all relations
|
|||
|
## Can check specific one(s) with include; can ignore some with exclude
|
|||
|
## Begin name with a '~' to make it a regular expression
|
|||
|
## Warning and critical are in sizes, defaults to bytes
|
|||
|
## Valid units: b, k, m, g, t, e
|
|||
|
## All above may be written as plural or with a trailing 'b'
|
|||
|
## Example: --critical="25 GB" --include="mylargetable"
|
|||
|
## Can also specify percentages
|
|||
|
|
|||
|
## Don't bother with tables or indexes unless they have at least this many bloated pages
|
|||
|
my $MINPAGES = 0;
|
|||
|
my $MINIPAGES = 10;
|
|||
|
|
|||
|
my $LIMIT = 10;
|
|||
|
if ($opt{perflimit}) {
|
|||
|
$LIMIT = $opt{perflimit};
|
|||
|
}
|
|||
|
|
|||
|
my ($warning, $critical) = validate_size_or_percent_with_oper
|
|||
|
({
|
|||
|
default_warning => '1 GB',
|
|||
|
default_critical => '5 GB',
|
|||
|
});
|
|||
|
|
|||
|
## This was fun to write
|
|||
|
$SQL = q{
|
|||
|
SELECT
|
|||
|
current_database() AS db, schemaname, tablename, reltuples::bigint AS tups, relpages::bigint AS pages, otta,
|
|||
|
ROUND(CASE WHEN otta=0 OR sml.relpages=0 OR sml.relpages=otta THEN 0.0 ELSE sml.relpages/otta::numeric END,1) AS tbloat,
|
|||
|
CASE WHEN relpages < otta THEN 0 ELSE relpages::bigint - otta END AS wastedpages,
|
|||
|
CASE WHEN relpages < otta THEN 0 ELSE bs*(sml.relpages-otta)::bigint END AS wastedbytes,
|
|||
|
CASE WHEN relpages < otta THEN '0 bytes'::text ELSE (bs*(relpages-otta))::bigint || ' bytes' END AS wastedsize,
|
|||
|
iname, ituples::bigint AS itups, ipages::bigint AS ipages, iotta,
|
|||
|
ROUND(CASE WHEN iotta=0 OR ipages=0 OR ipages=iotta THEN 0.0 ELSE ipages/iotta::numeric END,1) AS ibloat,
|
|||
|
CASE WHEN ipages < iotta THEN 0 ELSE ipages::bigint - iotta END AS wastedipages,
|
|||
|
CASE WHEN ipages < iotta THEN 0 ELSE bs*(ipages-iotta) END AS wastedibytes,
|
|||
|
CASE WHEN ipages < iotta THEN '0 bytes' ELSE (bs*(ipages-iotta))::bigint || ' bytes' END AS wastedisize,
|
|||
|
CASE WHEN relpages < otta THEN
|
|||
|
CASE WHEN ipages < iotta THEN 0 ELSE ipages-iotta::bigint END
|
|||
|
ELSE CASE WHEN ipages < iotta THEN relpages-otta::bigint
|
|||
|
ELSE relpages-otta::bigint + ipages-iotta::bigint END
|
|||
|
END AS totalwastedbytes
|
|||
|
FROM (
|
|||
|
SELECT
|
|||
|
schemaname, tablename, cc.reltuples, cc.relpages, bs,
|
|||
|
CEIL((cc.reltuples*((datahdr+ma-
|
|||
|
(CASE WHEN datahdr%ma=0 THEN ma ELSE datahdr%ma END))+nullhdr2+4))/(bs-20::float)) AS otta,
|
|||
|
COALESCE(c2.relname,'?') AS iname, COALESCE(c2.reltuples,0) AS ituples, COALESCE(c2.relpages,0) AS ipages,
|
|||
|
COALESCE(CEIL((c2.reltuples*(datahdr-12))/(bs-20::float)),0) AS iotta -- very rough approximation, assumes all cols
|
|||
|
FROM (
|
|||
|
SELECT
|
|||
|
ma,bs,schemaname,tablename,
|
|||
|
(datawidth+(hdr+ma-(case when hdr%ma=0 THEN ma ELSE hdr%ma END)))::numeric AS datahdr,
|
|||
|
(maxfracsum*(nullhdr+ma-(case when nullhdr%ma=0 THEN ma ELSE nullhdr%ma END))) AS nullhdr2
|
|||
|
FROM (
|
|||
|
SELECT
|
|||
|
schemaname, tablename, hdr, ma, bs,
|
|||
|
SUM((1-null_frac)*avg_width) AS datawidth,
|
|||
|
MAX(null_frac) AS maxfracsum,
|
|||
|
hdr+(
|
|||
|
SELECT 1+count(*)/8
|
|||
|
FROM pg_stats s2
|
|||
|
WHERE null_frac<>0 AND s2.schemaname = s.schemaname AND s2.tablename = s.tablename
|
|||
|
) AS nullhdr
|
|||
|
FROM pg_stats s, (
|
|||
|
SELECT
|
|||
|
BLOCK_SIZE,
|
|||
|
CASE WHEN SUBSTRING(SPLIT_PART(v, ' ', 2) FROM '#"[0-9]+.[0-9]+#"%' for '#')
|
|||
|
IN ('8.0','8.1','8.2') THEN 27 ELSE 23 END AS hdr,
|
|||
|
CASE WHEN v ~ 'mingw32' OR v ~ '64-bit' THEN 8 ELSE 4 END AS ma
|
|||
|
FROM (SELECT version() AS v) AS foo
|
|||
|
) AS constants
|
|||
|
GROUP BY 1,2,3,4,5
|
|||
|
) AS foo
|
|||
|
) AS rs
|
|||
|
JOIN pg_class cc ON cc.relname = rs.tablename
|
|||
|
JOIN pg_namespace nn ON cc.relnamespace = nn.oid AND nn.nspname = rs.schemaname AND nn.nspname <> 'information_schema'
|
|||
|
LEFT JOIN pg_index i ON indrelid = cc.oid
|
|||
|
LEFT JOIN pg_class c2 ON c2.oid = i.indexrelid
|
|||
|
) AS sml
|
|||
|
};
|
|||
|
|
|||
|
if (! defined $opt{include} and ! defined $opt{exclude}) {
|
|||
|
$SQL .= " WHERE sml.relpages - otta > $MINPAGES OR ipages - iotta > $MINIPAGES";
|
|||
|
$SQL .= " ORDER BY totalwastedbytes DESC LIMIT $LIMIT";
|
|||
|
}
|
|||
|
else {
|
|||
|
$SQL .= ' ORDER BY totalwastedbytes DESC';
|
|||
|
}
|
|||
|
|
|||
|
if ($psql_version <= 7.4) {
|
|||
|
$SQL =~ s/BLOCK_SIZE/(SELECT 8192) AS bs/;
|
|||
|
}
|
|||
|
else {
|
|||
|
$SQL =~ s/BLOCK_SIZE/(SELECT current_setting('block_size')::numeric) AS bs/;
|
|||
|
}
|
|||
|
|
|||
|
my $info = run_command($SQL);
|
|||
|
|
|||
|
if (defined $info->{db}[0] and exists $info->{db}[0]{error}) {
|
|||
|
ndie $info->{db}[0]{error};
|
|||
|
}
|
|||
|
|
|||
|
my %seenit;
|
|||
|
|
|||
|
## Store the perf data for sorting at the end
|
|||
|
my %perf;
|
|||
|
|
|||
|
$db = $info->{db}[0];
|
|||
|
|
|||
|
if ($db->{slurp} !~ /\w+/o) {
|
|||
|
add_ok msg('bloat-nomin') unless $MRTG;
|
|||
|
return;
|
|||
|
}
|
|||
|
## Not a 'regex' to run_command as we need to check the above first.
|
|||
|
if ($db->{slurp} !~ /\d+/) {
|
|||
|
add_unknown msg('invalid-query', $db->{slurp}) unless $MRTG;
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
my $max = -1;
|
|||
|
my $maxmsg = '?';
|
|||
|
|
|||
|
## The perf must be added before the add_x, so we defer the settings:
|
|||
|
my (@addwarn, @addcrit);
|
|||
|
|
|||
|
for my $r (@{ $db->{slurp} }) {
|
|||
|
|
|||
|
for my $v (values %$r) {
|
|||
|
$v =~ s/(\d+) bytes/pretty_size($1,1)/ge;
|
|||
|
}
|
|||
|
|
|||
|
my ($dbname,$schema,$table,$tups,$pages,$otta,$bloat,$wp,$wb,$ws) = @$r{
|
|||
|
qw/ db schemaname tablename tups pages otta tbloat wastedpages wastedbytes wastedsize/};
|
|||
|
|
|||
|
next if skip_item($table, $schema);
|
|||
|
|
|||
|
my ($index,$irows,$ipages,$iotta,$ibloat,$iwp,$iwb,$iws) = @$r{
|
|||
|
qw/ iname irows ipages iotta ibloat wastedipgaes wastedibytes wastedisize/};
|
|||
|
|
|||
|
## Made it past the exclusions
|
|||
|
$max = -2 if $max == -1;
|
|||
|
|
|||
|
## Do the table first if we haven't seen it
|
|||
|
if (! $seenit{"$dbname.$schema.$table"}++) {
|
|||
|
my $nicename = perfname("$schema.$table");
|
|||
|
$perf{$wb}{$nicename}++;
|
|||
|
my $msg = msg('bloat-table', $dbname, $schema, $table, $tups, $pages, $otta, $bloat, $wb, $ws);
|
|||
|
my $ok = 1;
|
|||
|
my $perbloat = $bloat * 100;
|
|||
|
|
|||
|
if ($MRTG) {
|
|||
|
$stats{table}{"DB=$dbname TABLE=$schema.$table"} = [$wb, $bloat];
|
|||
|
next;
|
|||
|
}
|
|||
|
if ($critical->($wb, $perbloat)) {
|
|||
|
push @addcrit => $msg;
|
|||
|
$ok = 0;
|
|||
|
}
|
|||
|
|
|||
|
if ($ok and $warning->($wb, $perbloat)) {
|
|||
|
push @addwarn => $msg;
|
|||
|
$ok = 0;
|
|||
|
}
|
|||
|
($max = $wb, $maxmsg = $msg) if $wb > $max and $ok;
|
|||
|
}
|
|||
|
|
|||
|
## Now the index, if it exists
|
|||
|
if ($index ne '?') {
|
|||
|
my $nicename = perfname($index);
|
|||
|
$perf{$iwb}{$nicename}++;
|
|||
|
my $msg = msg('bloat-index', $dbname, $index, $irows, $ipages, $iotta, $ibloat, $iwb, $iws);
|
|||
|
my $ok = 1;
|
|||
|
my $iperbloat = $ibloat * 100;
|
|||
|
|
|||
|
if ($MRTG) {
|
|||
|
$stats{index}{"DB=$dbname INDEX=$index"} = [$iwb, $ibloat];
|
|||
|
next;
|
|||
|
}
|
|||
|
if ($critical->($iwb, $iperbloat)) {
|
|||
|
push @addcrit => $msg;
|
|||
|
$ok = 0;
|
|||
|
}
|
|||
|
|
|||
|
if ($ok and $warning->($iwb, $iperbloat)) {
|
|||
|
push @addwarn => $msg;
|
|||
|
$ok = 0;
|
|||
|
}
|
|||
|
($max = $iwb, $maxmsg = $msg) if $iwb > $max and $ok;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
## Set a sorted limited perf
|
|||
|
$db->{perf} = '';
|
|||
|
my $count = 0;
|
|||
|
PERF: for my $size (sort {$b <=> $a } keys %perf) {
|
|||
|
for my $name (sort keys %{ $perf{$size} }) {
|
|||
|
$db->{perf} .= "$name=${size}B ";
|
|||
|
last PERF if $opt{perflimit} and ++$count >= $opt{perflimit};
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
## Now we can set the critical and warning
|
|||
|
for (@addcrit) {
|
|||
|
add_critical $_;
|
|||
|
$db->{perf} = '';
|
|||
|
}
|
|||
|
for (@addwarn) {
|
|||
|
add_warning $_;
|
|||
|
$db->{perf} = '';
|
|||
|
}
|
|||
|
|
|||
|
if ($max == -1) {
|
|||
|
add_unknown msg('no-match-rel');
|
|||
|
}
|
|||
|
elsif ($max != -1) {
|
|||
|
add_ok $maxmsg;
|
|||
|
}
|
|||
|
|
|||
|
if ($MRTG) {
|
|||
|
keys %stats or bad_mrtg(msg('unknown-error'));
|
|||
|
## We are going to report the highest wasted bytes for table and index
|
|||
|
my ($one,$two,$msg) = ('','');
|
|||
|
## Can also sort by ratio
|
|||
|
my $sortby = exists $opt{mrtg} and $opt{mrtg} eq 'ratio' ? 1 : 0;
|
|||
|
for (sort { $stats{table}{$b}->[$sortby] <=> $stats{table}{$a}->[$sortby] } keys %{$stats{table}}) {
|
|||
|
$one = $stats{table}{$_}->[$sortby];
|
|||
|
$msg = $_;
|
|||
|
last;
|
|||
|
}
|
|||
|
for (sort { $stats{index}{$b}->[$sortby] <=> $stats{index}{$a}->[$sortby] } keys %{$stats{index}}) {
|
|||
|
$two = $stats{index}{$_}->[$sortby];
|
|||
|
$msg .= " $_";
|
|||
|
last;
|
|||
|
}
|
|||
|
do_mrtg({one => $one, two => $two, msg => $msg});
|
|||
|
}
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_bloat
|
|||
|
|
|||
|
sub check_checkpoint {
|
|||
|
|
|||
|
## Checks how long in seconds since the last checkpoint on a WAL slave
|
|||
|
|
|||
|
## Note that this value is actually the last checkpoint on the
|
|||
|
## *master* (as copied from the WAL checkpoint record), so it more
|
|||
|
## indicative that the master has been unable to complete a
|
|||
|
## checkpoint for some other reason (i.e., unable to write dirty
|
|||
|
## buffers or archive_command failure, etc). As such, this check
|
|||
|
## may make more sense on the master, or we may want to look at
|
|||
|
## the WAL segments received/processed instead of the checkpoint
|
|||
|
## timestamp.
|
|||
|
## This checks can use the optionnal --asume-standby-mode or
|
|||
|
## --assume-prod: if the mode found is not the mode assumed, a
|
|||
|
## CRITICAL is emitted.
|
|||
|
|
|||
|
## Supports: Nagios, MRTG
|
|||
|
## Warning and critical are seconds
|
|||
|
## Requires $ENV{PGDATA} or --datadir
|
|||
|
|
|||
|
my ($warning, $critical) = validate_range
|
|||
|
({
|
|||
|
type => 'time',
|
|||
|
leastone => 1,
|
|||
|
forcemrtg => 1,
|
|||
|
});
|
|||
|
|
|||
|
$db->{host} = '<none>';
|
|||
|
|
|||
|
## Run pg_controldata, grab the time
|
|||
|
$res = open_controldata();
|
|||
|
|
|||
|
my $regex = msg('checkpoint-po');
|
|||
|
if ($res !~ /$regex\s*(.+)/) { ## no critic (ProhibitUnusedCapture)
|
|||
|
## Just in case, check the English one as well
|
|||
|
$regex = msg_en('checkpoint-po');
|
|||
|
if ($res !~ /$regex\s*(.+)/) {
|
|||
|
ndie msg('checkpoint-noregex');
|
|||
|
}
|
|||
|
}
|
|||
|
my $last = $1;
|
|||
|
|
|||
|
## Convert to number of seconds
|
|||
|
eval {
|
|||
|
require Date::Parse;
|
|||
|
import Date::Parse;
|
|||
|
};
|
|||
|
if ($@) {
|
|||
|
ndie msg('checkpoint-nodp');
|
|||
|
}
|
|||
|
my $dt = str2time($last);
|
|||
|
if ($dt !~ /^\d+$/) {
|
|||
|
ndie msg('checkpoint-noparse', $last);
|
|||
|
}
|
|||
|
my $diff = time - $dt;
|
|||
|
my $msg = $diff==1 ? msg('checkpoint-ok') : msg('checkpoint-ok2', $diff);
|
|||
|
$db->{perf} = sprintf '%s=%s;%s;%s',
|
|||
|
perfname(msg('age')), $diff, $warning, $critical;
|
|||
|
|
|||
|
my $mode = '';
|
|||
|
if ($STANDBY) {
|
|||
|
$mode = 'STANDBY';
|
|||
|
}
|
|||
|
if ($MASTER) {
|
|||
|
$mode = 'MASTER';
|
|||
|
}
|
|||
|
|
|||
|
## If we have an assume flag, then honor it.
|
|||
|
my $goodmode = 1;
|
|||
|
if ($opt{'assume-standby-mode'} and not $STANDBY) {
|
|||
|
$goodmode = 0;
|
|||
|
$mode = 'NOT STANDBY';
|
|||
|
}
|
|||
|
elsif ($opt{'assume-prod'} and not $MASTER) {
|
|||
|
$goodmode = 0;
|
|||
|
$mode = 'NOT MASTER';
|
|||
|
}
|
|||
|
|
|||
|
if (length($mode) > 0) {
|
|||
|
$db->{perf} .= sprintf ' %s=%s',
|
|||
|
perfname(msg('mode')), $mode;
|
|||
|
}
|
|||
|
|
|||
|
if ($MRTG) {
|
|||
|
do_mrtg({one => $diff, msg => $msg});
|
|||
|
}
|
|||
|
|
|||
|
if ((length $critical and $diff >= $critical) or not $goodmode) {
|
|||
|
add_critical $msg;
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
if (length $warning and $diff >= $warning) {
|
|||
|
add_warning $msg;
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
add_ok $msg;
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_checkpoint
|
|||
|
|
|||
|
|
|||
|
sub check_cluster_id {
|
|||
|
|
|||
|
|
|||
|
## Verify the Database System Identifier provided by pg_controldata
|
|||
|
## Supports: Nagios, MRTG
|
|||
|
## One of warning or critical must be given (but not both)
|
|||
|
## It should run one time to find out the expected cluster-id
|
|||
|
## You can use --critical="0" to find out the current cluster-id
|
|||
|
## You can include or exclude settings as well
|
|||
|
## Example:
|
|||
|
## check_postgres_cluster_id --critical="5633695740047915125"
|
|||
|
|
|||
|
my ($warning, $critical) = validate_range({type => 'integer_string', onlyone => 1});
|
|||
|
|
|||
|
$db->{host} = '<none>';
|
|||
|
|
|||
|
## Run pg_controldata, grab the cluster-id
|
|||
|
$res = open_controldata();
|
|||
|
|
|||
|
my $regex = msg('checkcluster-id');
|
|||
|
if ($res !~ /$regex\s*(.+)/) { ## no critic (ProhibitUnusedCapture)
|
|||
|
## Just in case, check the English one as well
|
|||
|
$regex = msg_en('checkcluster-id');
|
|||
|
if ($res !~ /$regex\s*(.+)/) {
|
|||
|
ndie msg('checkpoint-noregex');
|
|||
|
}
|
|||
|
}
|
|||
|
my $ident = $1;
|
|||
|
|
|||
|
my $msg = msg('checkcluster-msg', $ident);
|
|||
|
if ($MRTG) {
|
|||
|
$opt{mrtg} or ndie msg('checksum-nomrtg');
|
|||
|
do_mrtg({one => $opt{mrtg} eq $ident ? 1 : 0, msg => $ident});
|
|||
|
}
|
|||
|
if ($critical and $critical ne $ident) {
|
|||
|
add_critical $msg;
|
|||
|
}
|
|||
|
elsif ($warning and $warning ne $ident) {
|
|||
|
add_warning $msg;
|
|||
|
}
|
|||
|
elsif (!$critical and !$warning) {
|
|||
|
add_unknown $msg;
|
|||
|
}
|
|||
|
else {
|
|||
|
add_ok $msg;
|
|||
|
}
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_cluster_id
|
|||
|
|
|||
|
|
|||
|
sub check_commitratio {
|
|||
|
|
|||
|
## Check the commitratio of one or more databases
|
|||
|
## Supports: Nagios, MRTG
|
|||
|
## mrtg reports the largest two databases
|
|||
|
## By default, checks all databases
|
|||
|
## Can check specific one(s) with include
|
|||
|
## Can ignore some with exclude
|
|||
|
## Warning and criticals are percentages
|
|||
|
## Limit to a specific user (db owner) with the includeuser option
|
|||
|
## Exclude users with the excludeuser option
|
|||
|
|
|||
|
my ($warning, $critical) = validate_range({type => 'percent'});
|
|||
|
|
|||
|
$SQL = qq{
|
|||
|
SELECT
|
|||
|
round(100.*sd.xact_commit/(sd.xact_commit+sd.xact_rollback), 2) AS dcommitratio,
|
|||
|
d.datname,
|
|||
|
u.usename
|
|||
|
FROM pg_stat_database sd
|
|||
|
JOIN pg_database d ON (d.oid=sd.datid)
|
|||
|
JOIN pg_user u ON (u.usesysid=d.datdba)
|
|||
|
WHERE sd.xact_commit+sd.xact_rollback<>0
|
|||
|
$USERWHERECLAUSE
|
|||
|
};
|
|||
|
if ($opt{perflimit}) {
|
|||
|
$SQL .= " ORDER BY 1 DESC LIMIT $opt{perflimit}";
|
|||
|
}
|
|||
|
|
|||
|
my $info = run_command($SQL, { regex => qr{\d+}, emptyok => 1, } );
|
|||
|
my $found = 0;
|
|||
|
|
|||
|
for $db (@{$info->{db}}) {
|
|||
|
my $min = 101;
|
|||
|
$found = 1;
|
|||
|
my %s;
|
|||
|
for my $r (@{$db->{slurp}}) {
|
|||
|
|
|||
|
next if skip_item($r->{datname});
|
|||
|
|
|||
|
if ($r->{dcommitratio} <= $min) {
|
|||
|
$min = $r->{dcommitratio};
|
|||
|
}
|
|||
|
$s{$r->{datname}} = $r->{dcommitratio};
|
|||
|
}
|
|||
|
|
|||
|
if ($MRTG) {
|
|||
|
do_mrtg({one => $min, msg => "DB: $db->{dbname}"});
|
|||
|
}
|
|||
|
if ($min > 100) {
|
|||
|
$stats{$db->{dbname}} = 0;
|
|||
|
if ($USERWHERECLAUSE) {
|
|||
|
add_ok msg('no-match-user');
|
|||
|
}
|
|||
|
else {
|
|||
|
add_unknown msg('no-match-db');
|
|||
|
}
|
|||
|
next;
|
|||
|
}
|
|||
|
|
|||
|
my $msg = '';
|
|||
|
for (reverse sort {$s{$b} <=> $s{$a} or $a cmp $b } keys %s) {
|
|||
|
$msg .= "$_: $s{$_} ";
|
|||
|
$db->{perf} .= sprintf ' %s=%s;%s;%s',
|
|||
|
perfname($_), $s{$_}, $warning, $critical;
|
|||
|
}
|
|||
|
if (length $critical and $min <= $critical) {
|
|||
|
add_critical $msg;
|
|||
|
}
|
|||
|
elsif (length $warning and $min <= $warning) {
|
|||
|
add_warning $msg;
|
|||
|
}
|
|||
|
else {
|
|||
|
add_ok $msg;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
## If no results, probably a version problem
|
|||
|
if (!$found and keys %unknown) {
|
|||
|
(my $first) = values %unknown;
|
|||
|
if ($first->[0][0] =~ /pg_database_size/) {
|
|||
|
ndie msg('dbsize-version');
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_commitratio
|
|||
|
|
|||
|
|
|||
|
sub check_connection {
|
|||
|
|
|||
|
## Check the connection, get the connection time and version
|
|||
|
## No comparisons made: warning and critical are not allowed
|
|||
|
## Suports: Nagios, MRTG
|
|||
|
|
|||
|
if ($opt{warning} or $opt{critical}) {
|
|||
|
ndie msg('range-none');
|
|||
|
}
|
|||
|
|
|||
|
my $info = run_command('SELECT version() AS v');
|
|||
|
|
|||
|
for $db (@{$info->{db}}) {
|
|||
|
|
|||
|
my $err = $db->{error} || '';
|
|||
|
if ($err =~ /FATAL/) {
|
|||
|
$MRTG and do_mrtg({one => 0});
|
|||
|
add_critical $db->{error};
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
my $ver = ($db->{slurp}[0]{v} =~ /(\d+\.\d+\S+)/o) ? $1 : '';
|
|||
|
|
|||
|
$MRTG and do_mrtg({one => $ver ? 1 : 0});
|
|||
|
|
|||
|
if ($ver) {
|
|||
|
add_ok msg('version', $ver);
|
|||
|
}
|
|||
|
else {
|
|||
|
add_unknown msg('invalid-query', $db->{slurp}[0]{v});
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_connection
|
|||
|
|
|||
|
|
|||
|
sub check_custom_query {
|
|||
|
|
|||
|
## Run a user-supplied query, then parse the results
|
|||
|
## If you end up using this to make a useful query, consider making it
|
|||
|
## into a specific action and sending in a patch!
|
|||
|
## valtype must be one of: string, time, size, integer
|
|||
|
|
|||
|
my $valtype = $opt{valtype} || 'integer';
|
|||
|
|
|||
|
my ($warning, $critical) = validate_range({type => $valtype, leastone => 1});
|
|||
|
|
|||
|
my $query = $opt{query} or ndie msg('custom-nostring');
|
|||
|
|
|||
|
my $reverse = $opt{reverse} || 0;
|
|||
|
|
|||
|
my $info = run_command($query);
|
|||
|
|
|||
|
for $db (@{$info->{db}}) {
|
|||
|
|
|||
|
if (! @{$db->{slurp}}) {
|
|||
|
add_unknown msg('custom-norows');
|
|||
|
next;
|
|||
|
}
|
|||
|
|
|||
|
my $goodrow = 0;
|
|||
|
|
|||
|
## The other column tells it the name to use as the perfdata value
|
|||
|
my $perfname;
|
|||
|
|
|||
|
for my $r (@{$db->{slurp}}) {
|
|||
|
my $result = $r->{result};
|
|||
|
if (! defined $perfname) {
|
|||
|
$perfname = '';
|
|||
|
for my $name (keys %$r) {
|
|||
|
next if $name eq 'result';
|
|||
|
$perfname = $name;
|
|||
|
last;
|
|||
|
}
|
|||
|
}
|
|||
|
$goodrow++;
|
|||
|
if ($perfname) {
|
|||
|
$db->{perf} .= sprintf ' %s=%s;%s;%s',
|
|||
|
perfname($perfname), $r->{$perfname}, $warning, $critical;
|
|||
|
}
|
|||
|
my $gotmatch = 0;
|
|||
|
if (! defined $result) {
|
|||
|
add_unknown msg('custom-invalid');
|
|||
|
return;
|
|||
|
}
|
|||
|
if (length $critical) {
|
|||
|
if (($valtype eq 'string' and $result eq $critical)
|
|||
|
or
|
|||
|
($valtype ne 'string' and $reverse ? $result <= $critical : $result >= $critical)) { ## covers integer, time, size
|
|||
|
add_critical "$result";
|
|||
|
$gotmatch = 1;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if (length $warning and ! $gotmatch) {
|
|||
|
if (($valtype eq 'string' and $result eq $warning)
|
|||
|
or
|
|||
|
($valtype ne 'string' and length $result and $reverse ? $result <= $warning : $result >= $warning)) {
|
|||
|
add_warning "$result";
|
|||
|
$gotmatch = 1;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if (! $gotmatch) {
|
|||
|
add_ok "$result";
|
|||
|
}
|
|||
|
|
|||
|
} ## end each row returned
|
|||
|
|
|||
|
if (!$goodrow) {
|
|||
|
add_unknown msg('custom-invalid');
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_custom_query
|
|||
|
|
|||
|
|
|||
|
sub check_database_size {
|
|||
|
|
|||
|
## Check the size of one or more databases
|
|||
|
## Supports: Nagios, MRTG
|
|||
|
## mrtg reports the largest two databases
|
|||
|
## By default, checks all databases
|
|||
|
## Can check specific one(s) with include
|
|||
|
## Can ignore some with exclude
|
|||
|
## Warning and critical are bytes
|
|||
|
## Valid units: b, k, m, g, t, e
|
|||
|
## All above may be written as plural or with a trailing 'b'
|
|||
|
## Limit to a specific user (db owner) with the includeuser option
|
|||
|
## Exclude users with the excludeuser option
|
|||
|
|
|||
|
my ($warning, $critical) = validate_range({type => 'size'});
|
|||
|
|
|||
|
$USERWHERECLAUSE =~ s/AND/WHERE/;
|
|||
|
|
|||
|
$SQL = qq{
|
|||
|
SELECT pg_database_size(d.oid) AS dsize,
|
|||
|
pg_size_pretty(pg_database_size(d.oid)) AS pdsize,
|
|||
|
datname,
|
|||
|
usename
|
|||
|
FROM pg_database d
|
|||
|
LEFT JOIN pg_user u ON (u.usesysid=d.datdba)$USERWHERECLAUSE
|
|||
|
};
|
|||
|
if ($opt{perflimit}) {
|
|||
|
$SQL .= " ORDER BY 1 DESC LIMIT $opt{perflimit}";
|
|||
|
}
|
|||
|
|
|||
|
my $info = run_command($SQL, { regex => qr{\d+}, emptyok => 1, } );
|
|||
|
my $found = 0;
|
|||
|
|
|||
|
for $db (@{$info->{db}}) {
|
|||
|
my $max = -1;
|
|||
|
$found = 1;
|
|||
|
my %s;
|
|||
|
for my $r (@{$db->{slurp}}) {
|
|||
|
|
|||
|
next if skip_item($r->{datname});
|
|||
|
|
|||
|
if ($r->{dsize} >= $max) {
|
|||
|
$max = $r->{dsize};
|
|||
|
}
|
|||
|
$s{$r->{datname}} = [$r->{dsize},$r->{pdsize}];
|
|||
|
}
|
|||
|
|
|||
|
if ($MRTG) {
|
|||
|
do_mrtg({one => $max, msg => "DB: $db->{dbname}"});
|
|||
|
}
|
|||
|
if ($max < 0) {
|
|||
|
$stats{$db->{dbname}} = 0;
|
|||
|
if ($USERWHERECLAUSE) {
|
|||
|
add_ok msg('no-match-user');
|
|||
|
}
|
|||
|
else {
|
|||
|
add_unknown msg('no-match-db');
|
|||
|
}
|
|||
|
next;
|
|||
|
}
|
|||
|
|
|||
|
my $msg = '';
|
|||
|
for (sort {$s{$b}[0] <=> $s{$a}[0] or $a cmp $b } keys %s) {
|
|||
|
$msg .= "$_: $s{$_}[0] ($s{$_}[1]) ";
|
|||
|
$db->{perf} .= sprintf ' %s=%s;%s;%s',
|
|||
|
perfname($_), $s{$_}[0], $warning, $critical;
|
|||
|
}
|
|||
|
if (length $critical and $max >= $critical) {
|
|||
|
add_critical $msg;
|
|||
|
}
|
|||
|
elsif (length $warning and $max >= $warning) {
|
|||
|
add_warning $msg;
|
|||
|
}
|
|||
|
else {
|
|||
|
add_ok $msg;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
## If no results, probably a version problem
|
|||
|
if (!$found and keys %unknown) {
|
|||
|
(my $first) = values %unknown;
|
|||
|
if ($first->[0][0] =~ /pg_database_size/) {
|
|||
|
ndie msg('dbsize-version');
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_database_size
|
|||
|
|
|||
|
|
|||
|
sub check_dbstats {
|
|||
|
|
|||
|
## Returns values from the pg_stat_database view
|
|||
|
## Supports: Cacti
|
|||
|
## Assumes psql and target are the same version for the 8.3 check
|
|||
|
|
|||
|
my ($warning, $critical) = validate_range
|
|||
|
({
|
|||
|
type => 'cacti',
|
|||
|
});
|
|||
|
|
|||
|
my $SQL = q{SELECT datname,
|
|||
|
numbackends AS backends,xact_commit AS commits,xact_rollback AS rollbacks,
|
|||
|
blks_read AS read, blks_hit AS hit};
|
|||
|
if ($opt{dbname}) {
|
|||
|
$SQL .= q{
|
|||
|
,(SELECT SUM(idx_scan) FROM pg_stat_user_indexes) AS idxscan
|
|||
|
,COALESCE((SELECT SUM(idx_tup_read) FROM pg_stat_user_indexes),0) AS idxtupread
|
|||
|
,COALESCE((SELECT SUM(idx_tup_fetch) FROM pg_stat_user_indexes),0) AS idxtupfetch
|
|||
|
,COALESCE((SELECT SUM(idx_blks_read) FROM pg_statio_user_indexes),0) AS idxblksread
|
|||
|
,COALESCE((SELECT SUM(idx_blks_hit) FROM pg_statio_user_indexes),0) AS idxblkshit
|
|||
|
,COALESCE((SELECT SUM(seq_scan) FROM pg_stat_user_tables),0) AS seqscan
|
|||
|
,COALESCE((SELECT SUM(seq_tup_read) FROM pg_stat_user_tables),0) AS seqtupread
|
|||
|
};
|
|||
|
}
|
|||
|
$SQL .= q{ FROM pg_stat_database};
|
|||
|
(my $SQL2 = $SQL) =~ s/AS seqtupread/AS seqtupread, tup_returned AS ret, tup_fetched AS fetch, tup_inserted AS ins, tup_updated AS upd, tup_deleted AS del/;
|
|||
|
|
|||
|
my $info = run_command($SQL, {regex => qr{\w}, version => [ ">8.2 $SQL2" ] } );
|
|||
|
|
|||
|
for $db (@{$info->{db}}) {
|
|||
|
ROW: for my $r (@{$db->{slurp}}) {
|
|||
|
|
|||
|
my $dbname = $r->{datname};
|
|||
|
|
|||
|
next ROW if skip_item($dbname);
|
|||
|
|
|||
|
## If dbnames were specififed, use those for filtering as well
|
|||
|
if (@{$opt{dbname}}) {
|
|||
|
my $keepit = 0;
|
|||
|
for my $drow (@{$opt{dbname}}) {
|
|||
|
for my $d (split /,/ => $drow) {
|
|||
|
$d eq $dbname and $keepit = 1;
|
|||
|
}
|
|||
|
}
|
|||
|
next ROW unless $keepit;
|
|||
|
}
|
|||
|
|
|||
|
my $msg = '';
|
|||
|
for my $col (qw/
|
|||
|
backends commits rollbacks
|
|||
|
read hit
|
|||
|
idxscan idxtupread idxtupfetch idxblksread idxblkshit
|
|||
|
seqscan seqtupread
|
|||
|
ret fetch ins upd del/) {
|
|||
|
$msg .= "$col:";
|
|||
|
$msg .= (exists $r->{$col} and length $r->{$col}) ? $r->{$col} : 0;
|
|||
|
$msg .= ' ';
|
|||
|
}
|
|||
|
print "${msg}dbname:$dbname\n";
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
exit 0;
|
|||
|
|
|||
|
} ## end of check_dbstats
|
|||
|
|
|||
|
|
|||
|
sub check_disabled_triggers {
|
|||
|
|
|||
|
## Checks how many disabled triggers are in the database
|
|||
|
## Supports: Nagios, MRTG
|
|||
|
## Warning and critical are integers, defaults to 1
|
|||
|
|
|||
|
my ($warning, $critical) = validate_range
|
|||
|
({
|
|||
|
type => 'positive integer',
|
|||
|
default_warning => 1,
|
|||
|
default_critical => 1,
|
|||
|
forcemrtg => 1,
|
|||
|
});
|
|||
|
|
|||
|
$SQL = q{
|
|||
|
SELECT tgrelid::regclass AS tname, tgname, tgenabled
|
|||
|
FROM pg_trigger
|
|||
|
WHERE tgenabled IS NOT TRUE ORDER BY tgname
|
|||
|
};
|
|||
|
my $SQL83 = q{
|
|||
|
SELECT tgrelid::regclass AS tname, tgname, tgenabled
|
|||
|
FROM pg_trigger
|
|||
|
WHERE tgenabled = 'D' ORDER BY tgname
|
|||
|
};
|
|||
|
my $SQLOLD = q{SELECT 'FAIL' AS fail};
|
|||
|
|
|||
|
my $info = run_command($SQL, { version => [ ">8.2 $SQL83", "<8.1 $SQLOLD" ] } );
|
|||
|
|
|||
|
if (exists $info->{db}[0]{fail}) {
|
|||
|
ndie msg('die-action-version', $action, '8.1', $db->{version});
|
|||
|
}
|
|||
|
|
|||
|
my $count = 0;
|
|||
|
my $dislis = '';
|
|||
|
for $db (@{$info->{db}}) {
|
|||
|
|
|||
|
ROW: for my $r (@{$db->{slurp}}) {
|
|||
|
$count++;
|
|||
|
$dislis .= " $r->{tname}=>$r->{tgname}";
|
|||
|
}
|
|||
|
$MRTG and do_mrtg({one => $count});
|
|||
|
|
|||
|
my $msg = msg('trigger-msg', "$count$dislis");
|
|||
|
|
|||
|
if ($critical and $count >= $critical) {
|
|||
|
add_critical $msg;
|
|||
|
}
|
|||
|
elsif ($warning and $count >= $warning) {
|
|||
|
add_warning $msg;
|
|||
|
}
|
|||
|
else {
|
|||
|
add_ok $msg;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_disabled_triggers
|
|||
|
|
|||
|
|
|||
|
sub check_disk_space {
|
|||
|
|
|||
|
## Check the available disk space used by postgres
|
|||
|
## Supports: Nagios, MRTG
|
|||
|
## Requires the executable "/bin/df"
|
|||
|
## Must run as a superuser in the database (to examine 'data_directory' setting)
|
|||
|
## Critical and warning are maximum size, or percentages
|
|||
|
## Example: --critical="40 GB"
|
|||
|
## NOTE: Needs to run on the same system (for now)
|
|||
|
## XXX Allow custom ssh commands for remote df and the like
|
|||
|
|
|||
|
my ($warning, $critical) = validate_size_or_percent_with_oper
|
|||
|
({
|
|||
|
default_warning => '90%',
|
|||
|
default_critical => '95%',
|
|||
|
});
|
|||
|
|
|||
|
-x '/bin/df' or ndie msg('diskspace-nodf');
|
|||
|
|
|||
|
## Figure out where everything is.
|
|||
|
$SQL = q{
|
|||
|
SELECT 'S' AS syn, name AS nn, setting AS val
|
|||
|
FROM pg_settings
|
|||
|
WHERE name = 'data_directory'
|
|||
|
OR name ='log_directory'
|
|||
|
UNION ALL
|
|||
|
SELECT 'T' AS syn, spcname AS nn, spclocation AS val
|
|||
|
FROM pg_tablespace
|
|||
|
WHERE spclocation <> ''
|
|||
|
};
|
|||
|
|
|||
|
my $info = run_command($SQL);
|
|||
|
|
|||
|
my %dir; ## 1 = normal 2 = been checked -1 = does not exist
|
|||
|
my %seenfs;
|
|||
|
for $db (@{$info->{db}}) {
|
|||
|
my %i;
|
|||
|
for my $r (@{$db->{slurp}}) {
|
|||
|
$i{$r->{syn}}{$r->{nn}} = $r->{val};
|
|||
|
}
|
|||
|
if (! exists $i{S}{data_directory}) {
|
|||
|
add_unknown msg('diskspace-nodata');
|
|||
|
next;
|
|||
|
}
|
|||
|
my ($datadir,$logdir) = ($i{S}{data_directory},$i{S}{log_directory}||'');
|
|||
|
|
|||
|
if (!exists $dir{$datadir}) {
|
|||
|
if (! -d $datadir) {
|
|||
|
add_unknown msg('diskspace-nodir', $datadir);
|
|||
|
$dir{$datadir} = -1;
|
|||
|
next;
|
|||
|
}
|
|||
|
$dir{$datadir} = 1;
|
|||
|
|
|||
|
## Check if the WAL files are on a separate disk
|
|||
|
my $xlog = "$datadir/pg_xlog";
|
|||
|
if (-l $xlog) {
|
|||
|
my $linkdir = readlink($xlog);
|
|||
|
$dir{$linkdir} = 1 if ! exists $dir{$linkdir};
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
## Check log_directory: relative or absolute
|
|||
|
if (length $logdir) {
|
|||
|
if ($logdir =~ /^\w/) { ## relative, check only if symlinked
|
|||
|
$logdir = "$datadir/$logdir";
|
|||
|
if (-l $logdir) {
|
|||
|
my $linkdir = readlink($logdir);
|
|||
|
$dir{$linkdir} = 1 if ! exists $dir{$linkdir};
|
|||
|
}
|
|||
|
}
|
|||
|
else { ## absolute, always check
|
|||
|
if ($logdir ne $datadir and ! exists $dir{$logdir}) {
|
|||
|
$dir{$logdir} = 1;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
## Check all tablespaces
|
|||
|
for my $tsname (keys %{$i{T}}) {
|
|||
|
my $tsdir = $i{T}{$tsname};
|
|||
|
$dir{$tsdir} = 1 if ! exists $dir{$tsdir};
|
|||
|
}
|
|||
|
|
|||
|
my $gotone = 0;
|
|||
|
for my $dir (keys %dir) {
|
|||
|
next if $dir{$dir} != 1;
|
|||
|
|
|||
|
$dir{$dir} = 1;
|
|||
|
|
|||
|
$COM = qq{/bin/df -kP "$dir" 2>&1};
|
|||
|
$res = qx{$COM};
|
|||
|
|
|||
|
if ($res !~ /^.+\n(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\%\s+(\S+)/) {
|
|||
|
ndie msg('diskspace-fail', $COM, $res);
|
|||
|
}
|
|||
|
my ($fs,$total,$used,$avail,$percent,$mount) = ($1,$2*1024,$3*1024,$4*1024,$5,$6);
|
|||
|
|
|||
|
## If we've already done this one, skip it
|
|||
|
next if $seenfs{$fs}++;
|
|||
|
|
|||
|
next if skip_item($fs);
|
|||
|
|
|||
|
if ($MRTG) {
|
|||
|
$stats{$fs} = [$total,$used,$avail,$percent];
|
|||
|
next;
|
|||
|
}
|
|||
|
|
|||
|
$gotone = 1;
|
|||
|
|
|||
|
## Rather than make another call with -h, do it ourselves
|
|||
|
my $prettyused = pretty_size($used);
|
|||
|
my $prettytotal = pretty_size($total);
|
|||
|
|
|||
|
my $msg = msg('diskspace-msg', $fs, $mount, $prettyused, $prettytotal, $percent);
|
|||
|
|
|||
|
$db->{perf} = sprintf '%s=%sB',
|
|||
|
perfname(msg('size')), $used;
|
|||
|
|
|||
|
my $ok = 1;
|
|||
|
if ($critical->($used, $percent)) {
|
|||
|
add_critical $msg;
|
|||
|
$ok = 0;
|
|||
|
}
|
|||
|
|
|||
|
if ($ok and $warning->($used, $percent)) {
|
|||
|
add_warning $msg;
|
|||
|
$ok = 0;
|
|||
|
}
|
|||
|
|
|||
|
if ($ok) {
|
|||
|
add_ok $msg;
|
|||
|
}
|
|||
|
} ## end each dir
|
|||
|
|
|||
|
next if $MRTG;
|
|||
|
|
|||
|
if (!$gotone) {
|
|||
|
add_unknown msg('no-match-fs');
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if ($MRTG) {
|
|||
|
keys %stats or bad_mrtg(msg('unknown-error'));
|
|||
|
## Get the highest by total size or percent (total, used, avail, percent)
|
|||
|
## We default to 'available'
|
|||
|
my $sortby = exists $opt{mrtg}
|
|||
|
? $opt{mrtg} eq 'total' ? 0
|
|||
|
: $opt{mrtg} eq 'used' ? 1
|
|||
|
: $opt{mrtg} eq 'avail' ? 2
|
|||
|
: $opt{mrtg} eq 'percent' ? 3 : 2 : 2;
|
|||
|
my ($one,$two,$msg) = ('','','');
|
|||
|
for (sort { $stats{$b}->[$sortby] <=> $stats{$a}->[$sortby] } keys %stats) {
|
|||
|
if ($one eq '') {
|
|||
|
$one = $stats{$_}->[$sortby];
|
|||
|
$msg = $_;
|
|||
|
next;
|
|||
|
}
|
|||
|
$two = $stats{$_}->[$sortby];
|
|||
|
last;
|
|||
|
}
|
|||
|
do_mrtg({one => $one, two => $two, msg => $msg});
|
|||
|
}
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_disk_space
|
|||
|
|
|||
|
|
|||
|
sub check_fsm_pages {
|
|||
|
|
|||
|
## Check on the percentage of free space map pages in use
|
|||
|
## Supports: Nagios, MRTG
|
|||
|
## Must run as superuser
|
|||
|
## Requires pg_freespacemap contrib module
|
|||
|
## Critical and warning are a percentage of max_fsm_pages
|
|||
|
## Example: --critical=95
|
|||
|
|
|||
|
my ($warning, $critical) = validate_range
|
|||
|
({
|
|||
|
type => 'percent',
|
|||
|
default_warning => '85%',
|
|||
|
default_critical => '95%',
|
|||
|
});
|
|||
|
|
|||
|
(my $w = $warning) =~ s/\D//;
|
|||
|
(my $c = $critical) =~ s/\D//;
|
|||
|
my $SQL = q{
|
|||
|
SELECT pages, maxx, ROUND(100*(pages/maxx)) AS percent
|
|||
|
FROM
|
|||
|
(SELECT (sumrequests+numrels)*chunkpages AS pages
|
|||
|
FROM (SELECT SUM(CASE WHEN avgrequest IS NULL
|
|||
|
THEN interestingpages/32 ELSE interestingpages/16 END) AS sumrequests,
|
|||
|
COUNT(relfilenode) AS numrels, 16 AS chunkpages FROM pg_freespacemap_relations) AS foo) AS foo2,
|
|||
|
(SELECT setting::NUMERIC AS maxx FROM pg_settings WHERE name = 'max_fsm_pages') AS foo3
|
|||
|
};
|
|||
|
my $SQLNOOP = q{SELECT 'FAIL' AS fail};
|
|||
|
|
|||
|
my $info = run_command($SQL, { version => [ ">8.3 $SQLNOOP" ] } );
|
|||
|
|
|||
|
if (exists $info->{db}[0]{slurp}[0]{fail}) {
|
|||
|
add_unknown msg('fsm-page-highver');
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
for $db (@{$info->{db}}) {
|
|||
|
for my $r (@{$db->{slurp}}) {
|
|||
|
my ($pages,$max,$percent) = ($r->{pages}||0,$r->{maxx},$r->{percent}||0);
|
|||
|
|
|||
|
$MRTG and do_mrtg({one => $percent, two => $pages});
|
|||
|
|
|||
|
my $msg = msg('fsm-page-msg', $pages, $max, $percent);
|
|||
|
|
|||
|
if (length $critical and $percent >= $c) {
|
|||
|
add_critical $msg;
|
|||
|
}
|
|||
|
elsif (length $warning and $percent >= $w) {
|
|||
|
add_warning $msg;
|
|||
|
}
|
|||
|
else {
|
|||
|
add_ok $msg;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_fsm_pages
|
|||
|
|
|||
|
|
|||
|
sub check_fsm_relations {
|
|||
|
|
|||
|
## Check on the % of free space map relations in use
|
|||
|
## Supports: Nagios, MRTG
|
|||
|
## Must run as superuser
|
|||
|
## Requires pg_freespacemap contrib module
|
|||
|
## Critical and warning are a percentage of max_fsm_relations
|
|||
|
## Example: --critical=95
|
|||
|
|
|||
|
my ($warning, $critical) = validate_range
|
|||
|
({
|
|||
|
type => 'percent',
|
|||
|
default_warning => '85%',
|
|||
|
default_critical => '95%',
|
|||
|
});
|
|||
|
|
|||
|
(my $w = $warning) =~ s/\D//;
|
|||
|
(my $c = $critical) =~ s/\D//;
|
|||
|
|
|||
|
my $SQL = q{
|
|||
|
SELECT maxx, cur, ROUND(100*(cur/maxx)) AS percent
|
|||
|
FROM (SELECT
|
|||
|
(SELECT COUNT(*) FROM pg_freespacemap_relations) AS cur,
|
|||
|
(SELECT setting::NUMERIC FROM pg_settings WHERE name='max_fsm_relations') AS maxx) x
|
|||
|
};
|
|||
|
my $SQLNOOP = q{SELECT 'FAIL' AS fail};
|
|||
|
|
|||
|
my $info = run_command($SQL, { version => [ ">8.3 $SQLNOOP" ] } );
|
|||
|
|
|||
|
if (exists $info->{db}[0]{slurp}[0]{fail}) {
|
|||
|
add_unknown msg('fsm-rel-highver');
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
for $db (@{$info->{db}}) {
|
|||
|
|
|||
|
for my $r (@{$db->{slurp}}) {
|
|||
|
my ($max,$cur,$percent) = ($r->{maxx},$r->{cur},$r->{percent}||0);
|
|||
|
|
|||
|
$MRTG and do_mrtg({one => $percent, two => $cur});
|
|||
|
|
|||
|
my $msg = msg('fsm-rel-msg', $cur, $max, $percent);
|
|||
|
|
|||
|
if (length $critical and $percent >= $c) {
|
|||
|
add_critical $msg;
|
|||
|
}
|
|||
|
elsif (length $warning and $percent >= $w) {
|
|||
|
add_warning $msg;
|
|||
|
}
|
|||
|
else {
|
|||
|
add_ok $msg;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
}
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_fsm_relations
|
|||
|
|
|||
|
|
|||
|
sub check_hitratio {
|
|||
|
|
|||
|
## Check the hitratio of one or more databases
|
|||
|
## Supports: Nagios, MRTG
|
|||
|
## mrtg reports the largest two databases
|
|||
|
## By default, checks all databases
|
|||
|
## Can check specific one(s) with include
|
|||
|
## Can ignore some with exclude
|
|||
|
## Warning and criticals are percentages
|
|||
|
## Limit to a specific user (db owner) with the includeuser option
|
|||
|
## Exclude users with the excludeuser option
|
|||
|
|
|||
|
my ($warning, $critical) = validate_range({type => 'percent'});
|
|||
|
|
|||
|
$SQL = qq{
|
|||
|
SELECT
|
|||
|
round(100.*sd.blks_hit/(sd.blks_read+sd.blks_hit), 2) AS dhitratio,
|
|||
|
d.datname,
|
|||
|
u.usename
|
|||
|
FROM pg_stat_database sd
|
|||
|
JOIN pg_database d ON (d.oid=sd.datid)
|
|||
|
JOIN pg_user u ON (u.usesysid=d.datdba)
|
|||
|
WHERE sd.blks_read+sd.blks_hit<>0
|
|||
|
$USERWHERECLAUSE
|
|||
|
};
|
|||
|
if ($opt{perflimit}) {
|
|||
|
$SQL .= " ORDER BY 1 DESC LIMIT $opt{perflimit}";
|
|||
|
}
|
|||
|
|
|||
|
my $info = run_command($SQL, { regex => qr{\d+}, emptyok => 1, } );
|
|||
|
my $found = 0;
|
|||
|
|
|||
|
for $db (@{$info->{db}}) {
|
|||
|
my $min = 101;
|
|||
|
$found = 1;
|
|||
|
my %s;
|
|||
|
for my $r (@{$db->{slurp}}) {
|
|||
|
|
|||
|
next if skip_item($r->{datname});
|
|||
|
|
|||
|
if ($r->{dhitratio} <= $min) {
|
|||
|
$min = $r->{dhitratio};
|
|||
|
}
|
|||
|
$s{$r->{datname}} = $r->{dhitratio};
|
|||
|
}
|
|||
|
|
|||
|
if ($MRTG) {
|
|||
|
do_mrtg({one => $min, msg => "DB: $db->{dbname}"});
|
|||
|
}
|
|||
|
if ($min > 100) {
|
|||
|
$stats{$db->{dbname}} = 0;
|
|||
|
if ($USERWHERECLAUSE) {
|
|||
|
add_ok msg('no-match-user');
|
|||
|
}
|
|||
|
else {
|
|||
|
add_unknown msg('no-match-db');
|
|||
|
}
|
|||
|
next;
|
|||
|
}
|
|||
|
|
|||
|
my $msg = '';
|
|||
|
for (reverse sort {$s{$b} <=> $s{$a} or $a cmp $b } keys %s) {
|
|||
|
$msg .= "$_: $s{$_} ";
|
|||
|
$db->{perf} .= sprintf ' %s=%s;%s;%s',
|
|||
|
perfname($_), $s{$_}, $warning, $critical;
|
|||
|
}
|
|||
|
if (length $critical and $min <= $critical) {
|
|||
|
add_critical $msg;
|
|||
|
}
|
|||
|
elsif (length $warning and $min <= $warning) {
|
|||
|
add_warning $msg;
|
|||
|
}
|
|||
|
else {
|
|||
|
add_ok $msg;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
## If no results, probably a version problem
|
|||
|
if (!$found and keys %unknown) {
|
|||
|
(my $first) = values %unknown;
|
|||
|
if ($first->[0][0] =~ /pg_database_size/) {
|
|||
|
ndie msg('dbsize-version');
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_hitratio
|
|||
|
|
|||
|
|
|||
|
sub check_hot_standby_delay {
|
|||
|
|
|||
|
## Check on the delay in PITR replication between master and slave
|
|||
|
## Supports: Nagios, MRTG
|
|||
|
## Critical and warning are the delay between master and slave xlog locations
|
|||
|
## Example: --critical=1024
|
|||
|
|
|||
|
my ($warning, $critical) = validate_range({type => 'integer', leastone => 1});
|
|||
|
|
|||
|
# check if master and slave comply with the check using pg_is_in_recovery()
|
|||
|
my ($master, $slave);
|
|||
|
$SQL = q{SELECT pg_is_in_recovery() AS recovery;};
|
|||
|
|
|||
|
# Check if master is online (e.g. really a master)
|
|||
|
for my $x (1..2) {
|
|||
|
my $info = run_command($SQL, { dbnumber => $x, regex => qr(t|f) });
|
|||
|
|
|||
|
for $db (@{$info->{db}}) {
|
|||
|
my $status = $db->{slurp}[0];
|
|||
|
if ($status->{recovery} eq 't') {
|
|||
|
$slave = $x;
|
|||
|
last;
|
|||
|
}
|
|||
|
if ($status->{recovery} eq 'f') {
|
|||
|
$master = $x;
|
|||
|
last;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
if (! defined $slave and ! defined $master) {
|
|||
|
add_unknown msg('hs-no-role');
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
## If the slave is "db1" and master "db2", go ahead and switch them around for clearer output
|
|||
|
if (1 == $slave) {
|
|||
|
($slave, $master) = (2, 1);
|
|||
|
for my $k (qw(host port dbname dbuser dbpass)) {
|
|||
|
($opt{$k}, $opt{$k . 2}) = ($opt{$k . 2}, $opt{$k}); ## no critic (ProhibitMismatchedOperators)
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
## Get xlog positions
|
|||
|
my ($moffset, $s_rec_offset, $s_rep_offset);
|
|||
|
## On master
|
|||
|
$SQL = q{SELECT pg_current_xlog_location() AS location};
|
|||
|
my $info = run_command($SQL, { dbnumber => $master });
|
|||
|
my $saved_db;
|
|||
|
for $db (@{$info->{db}}) {
|
|||
|
my $location = $db->{slurp}[0]{location};
|
|||
|
next if ! defined $location;
|
|||
|
|
|||
|
my ($x, $y) = split(/\//, $location);
|
|||
|
$moffset = (hex('ff000000') * hex($x)) + hex($y);
|
|||
|
$saved_db = $db if ! defined $saved_db;
|
|||
|
}
|
|||
|
|
|||
|
if (! defined $moffset) {
|
|||
|
add_unknown msg('hs-no-location', 'master');
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
## On slave
|
|||
|
$SQL = q{SELECT pg_last_xlog_receive_location() AS receive, pg_last_xlog_replay_location() AS replay};
|
|||
|
|
|||
|
$info = run_command($SQL, { dbnumber => $slave, regex => qr/\// });
|
|||
|
|
|||
|
for $db (@{$info->{db}}) {
|
|||
|
my $receive = $db->{slurp}[0]{receive};
|
|||
|
my $replay = $db->{slurp}[0]{replay};
|
|||
|
|
|||
|
if (defined $receive) {
|
|||
|
my ($a, $b) = split(/\//, $receive);
|
|||
|
$s_rec_offset = (hex('ff000000') * hex($a)) + hex($b);
|
|||
|
}
|
|||
|
|
|||
|
if (defined $replay) {
|
|||
|
my ($a, $b) = split(/\//, $replay);
|
|||
|
$s_rep_offset = (hex('ff000000') * hex($a)) + hex($b);
|
|||
|
}
|
|||
|
|
|||
|
$saved_db = $db if ! defined $saved_db;
|
|||
|
}
|
|||
|
|
|||
|
if (! defined $s_rec_offset and ! defined $s_rep_offset) {
|
|||
|
add_unknown msg('hs-no-location', 'slave');
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
## Compute deltas
|
|||
|
$db = $saved_db;
|
|||
|
my $rec_delta = $moffset - $s_rec_offset;
|
|||
|
my $rep_delta = $moffset - $s_rep_offset;
|
|||
|
|
|||
|
$MRTG and do_mrtg({one => $rep_delta, two => $rec_delta});
|
|||
|
|
|||
|
$db->{perf} = sprintf ' %s=%s;%s;%s ',
|
|||
|
perfname(msg('hs-replay-delay')), $rep_delta, $warning, $critical;
|
|||
|
$db->{perf} .= sprintf ' %s=%s;%s;%s',
|
|||
|
perfname(msg('hs-receive-delay')), $rec_delta, $warning, $critical;
|
|||
|
|
|||
|
## Do the check on replay delay in case SR has disconnected because it way too far behind
|
|||
|
my $msg = qq{$rep_delta};
|
|||
|
if (length $critical and $rep_delta > $critical) {
|
|||
|
add_critical $msg;
|
|||
|
}
|
|||
|
elsif (length $warning and $rep_delta > $warning) {
|
|||
|
add_warning $msg;
|
|||
|
}
|
|||
|
else {
|
|||
|
add_ok $msg;
|
|||
|
}
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_hot_standby_delay
|
|||
|
|
|||
|
|
|||
|
sub check_last_analyze {
|
|||
|
my $auto = shift || '';
|
|||
|
return check_last_vacuum_analyze('analyze', $auto);
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
sub check_last_vacuum {
|
|||
|
my $auto = shift || '';
|
|||
|
return check_last_vacuum_analyze('vacuum', $auto);
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
sub check_last_vacuum_analyze {
|
|||
|
|
|||
|
my $type = shift || 'vacuum';
|
|||
|
my $auto = shift || 0;
|
|||
|
|
|||
|
## Check the last time things were vacuumed or analyzed
|
|||
|
## Supports: Nagios, MRTG
|
|||
|
## NOTE: stats_row_level must be set to on in your database (if version 8.2)
|
|||
|
## By default, reports on the oldest value in the database
|
|||
|
## Can exclude and include tables
|
|||
|
## Warning and critical are times, default to seconds
|
|||
|
## Valid units: s[econd], m[inute], h[our], d[ay]
|
|||
|
## All above may be written as plural as well (e.g. "2 hours")
|
|||
|
## Limit to a specific user (relation owner) with the includeuser option
|
|||
|
## Exclude users with the excludeuser option
|
|||
|
## Example:
|
|||
|
## --exclude=~pg_ --include=pg_class,pg_attribute
|
|||
|
|
|||
|
my ($warning, $critical) = validate_range
|
|||
|
({
|
|||
|
type => 'time',
|
|||
|
default_warning => '1 day',
|
|||
|
default_critical => '2 days',
|
|||
|
});
|
|||
|
|
|||
|
my $criteria = $auto ?
|
|||
|
qq{pg_stat_get_last_auto${type}_time(c.oid)}
|
|||
|
: qq{GREATEST(pg_stat_get_last_${type}_time(c.oid), pg_stat_get_last_auto${type}_time(c.oid))};
|
|||
|
|
|||
|
## Do include/exclude earlier for large pg_classes?
|
|||
|
$SQL = qq{
|
|||
|
SELECT current_database() AS datname, nspname AS sname, relname AS tname,
|
|||
|
CASE WHEN v IS NULL THEN -1 ELSE round(extract(epoch FROM now()-v)) END AS ltime,
|
|||
|
CASE WHEN v IS NULL THEN '?' ELSE TO_CHAR(v, '$SHOWTIME') END AS ptime
|
|||
|
FROM (SELECT nspname, relname, $criteria AS v
|
|||
|
FROM pg_class c, pg_namespace n
|
|||
|
WHERE relkind = 'r'
|
|||
|
AND n.oid = c.relnamespace
|
|||
|
AND n.nspname <> 'information_schema'
|
|||
|
ORDER BY 3) AS foo
|
|||
|
};
|
|||
|
if ($opt{perflimit}) {
|
|||
|
$SQL .= ' ORDER BY 4 DESC';
|
|||
|
}
|
|||
|
|
|||
|
if ($USERWHERECLAUSE) {
|
|||
|
$SQL =~ s/ WHERE/, pg_user u WHERE u.usesysid=c.relowner$USERWHERECLAUSE AND/;
|
|||
|
}
|
|||
|
|
|||
|
my $info = run_command($SQL, { regex => qr{\w}, emptyok => 1 } );
|
|||
|
|
|||
|
for $db (@{$info->{db}}) {
|
|||
|
|
|||
|
if (! @{$db->{slurp}} and $USERWHERECLAUSE) {
|
|||
|
$stats{$db->{dbname}} = 0;
|
|||
|
add_ok msg('no-match-user');
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
## -1 means no tables found at all
|
|||
|
## -2 means exclusion rules took effect
|
|||
|
## -3 means no tables were ever vacuumed/analyzed
|
|||
|
my $maxtime = -1;
|
|||
|
my $maxptime = '?';
|
|||
|
my ($minrel,$maxrel) = ('?','?'); ## no critic
|
|||
|
my $mintime = 0; ## used for MRTG only
|
|||
|
my $count = 0;
|
|||
|
my $found = 0;
|
|||
|
ROW: for my $r (@{$db->{slurp}}) {
|
|||
|
my ($dbname,$schema,$name,$time,$ptime) = @$r{qw/ datname sname tname ltime ptime/};
|
|||
|
if (skip_item($name, $schema)) {
|
|||
|
$maxtime = -2 if $maxtime < 1;
|
|||
|
next ROW;
|
|||
|
}
|
|||
|
$found++;
|
|||
|
if ($time >= 0) {
|
|||
|
$db->{perf} .= sprintf ' %s=%ss;%s;%s',
|
|||
|
perfname("$dbname.$schema.$name"),$time, $warning, $critical;
|
|||
|
}
|
|||
|
if ($time > $maxtime) {
|
|||
|
$maxtime = $time;
|
|||
|
$maxrel = "DB: $dbname TABLE: $schema.$name";
|
|||
|
$maxptime = $ptime;
|
|||
|
}
|
|||
|
if ($time > 0 and ($time < $mintime or !$mintime)) {
|
|||
|
$mintime = $time;
|
|||
|
$minrel = "DB: $dbname TABLE: $schema.$name";
|
|||
|
}
|
|||
|
if ($opt{perflimit}) {
|
|||
|
last if ++$count >= $opt{perflimit};
|
|||
|
}
|
|||
|
}
|
|||
|
if ($MRTG) {
|
|||
|
$maxrel eq '?' and $maxrel = "DB: $db->{dbname} TABLE: ?";
|
|||
|
do_mrtg({one => $mintime, msg => $maxrel});
|
|||
|
return;
|
|||
|
}
|
|||
|
if ($maxtime == -2) {
|
|||
|
add_unknown (
|
|||
|
$found ? $type eq 'vacuum' ? msg('vac-nomatch-v')
|
|||
|
: msg('vac-nomatch-a')
|
|||
|
: msg('no-match-table') ## no critic (RequireTrailingCommaAtNewline)
|
|||
|
);
|
|||
|
}
|
|||
|
elsif ($maxtime < 0) {
|
|||
|
add_unknown $type eq 'vacuum' ? msg('vac-nomatch-v') : msg('vac-nomatch-a');
|
|||
|
}
|
|||
|
else {
|
|||
|
my $showtime = pretty_time($maxtime, 'S');
|
|||
|
my $msg = "$maxrel: $maxptime ($showtime)";
|
|||
|
if ($critical and $maxtime >= $critical) {
|
|||
|
add_critical $msg;
|
|||
|
}
|
|||
|
elsif ($warning and $maxtime >= $warning) {
|
|||
|
add_warning $msg;
|
|||
|
}
|
|||
|
else {
|
|||
|
add_ok $msg;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_last_vacuum_analyze
|
|||
|
|
|||
|
|
|||
|
sub check_listener {
|
|||
|
|
|||
|
## Check for a specific listener
|
|||
|
## Supports: Nagios, MRTG
|
|||
|
## Critical and warning are simple strings, or regex if starts with a ~
|
|||
|
## Example: --critical="~bucardo"
|
|||
|
|
|||
|
if ($MRTG and exists $opt{mrtg}) {
|
|||
|
$opt{critical} = $opt{mrtg};
|
|||
|
}
|
|||
|
|
|||
|
my ($warning, $critical) = validate_range({type => 'restringex', forcemrtg => 1});
|
|||
|
|
|||
|
my $string = length $critical ? $critical : $warning;
|
|||
|
my $regex = ($string =~ s/^~//) ? '~' : '=';
|
|||
|
|
|||
|
$SQL = "SELECT count(*) AS c FROM pg_listener WHERE relname $regex '$string'";
|
|||
|
my $info = run_command($SQL);
|
|||
|
|
|||
|
for $db (@{$info->{db}}) {
|
|||
|
if ($db->{slurp}[0]{c} !~ /(\d+)/) {
|
|||
|
add_unknown msg('invalid-query', $db->{slurp});
|
|||
|
next;
|
|||
|
}
|
|||
|
my $count = $1;
|
|||
|
if ($MRTG) {
|
|||
|
do_mrtg({one => $count});
|
|||
|
}
|
|||
|
$db->{perf} .= sprintf '%s=%s',
|
|||
|
perfname(msg('listening')), $count;
|
|||
|
my $msg = msg('listener-msg', $count);
|
|||
|
if ($count >= 1) {
|
|||
|
add_ok $msg;
|
|||
|
}
|
|||
|
elsif ($critical) {
|
|||
|
add_critical $msg;
|
|||
|
}
|
|||
|
else {
|
|||
|
add_warning $msg;
|
|||
|
}
|
|||
|
}
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_listener
|
|||
|
|
|||
|
|
|||
|
sub check_locks {
|
|||
|
|
|||
|
## Check the number of locks
|
|||
|
## Supports: Nagios, MRTG
|
|||
|
## By default, checks all databases
|
|||
|
## Can check specific databases with include
|
|||
|
## Can ignore databases with exclude
|
|||
|
## Warning and critical are either simple numbers, or more complex:
|
|||
|
## Use locktype=number;locktype2=number
|
|||
|
## The locktype can be "total", "waiting", or the name of a lock
|
|||
|
## Lock names are case-insensitive, and do not need the "lock" at the end.
|
|||
|
## Example: --warning=100 --critical="total=200;exclusive=20;waiting=5"
|
|||
|
|
|||
|
my ($warning, $critical) = validate_range
|
|||
|
({
|
|||
|
type => 'multival',
|
|||
|
default_warning => 100,
|
|||
|
default_critical => 150,
|
|||
|
});
|
|||
|
|
|||
|
$SQL = q{SELECT granted, mode, datname FROM pg_locks l JOIN pg_database d ON (d.oid=l.database)};
|
|||
|
my $info = run_command($SQL, { regex => qr[\s*\w+\s*\|\s*] });
|
|||
|
|
|||
|
# Locks are counted globally not by db.
|
|||
|
# add a limit by db ? (--critical='foodb.total=30 foodb.exclusive=3 postgres.total=3')
|
|||
|
# end remove the -db option ?
|
|||
|
# we output for each db, following the specific warning and critical :
|
|||
|
# time=00.1 foodb.exclusive=2;;3 foodb.total=10;;30 postgres.exclusive=0;;3 postgres.total=1;;3
|
|||
|
for $db (@{$info->{db}}) {
|
|||
|
my $gotone = 0;
|
|||
|
my %dblock;
|
|||
|
my %totallock = (total => 0);
|
|||
|
ROW: for my $r (@{$db->{slurp}}) {
|
|||
|
my ($granted,$mode,$dbname) = ($r->{granted}, lc $r->{mode}, $r->{datname});
|
|||
|
next ROW if skip_item($dbname);
|
|||
|
$gotone = 1;
|
|||
|
$mode =~ s{lock$}{};
|
|||
|
$dblock{$dbname}{total}++;
|
|||
|
$dblock{$dbname}{$mode}++;
|
|||
|
$dblock{$dbname}{waiting}++ if $granted ne 't';
|
|||
|
}
|
|||
|
# Compute total, add hash key for critical and warning specific check
|
|||
|
for my $k (keys %dblock) {
|
|||
|
if ($warning) {
|
|||
|
for my $l (keys %{$warning}) {
|
|||
|
$dblock{$k}{$l} = 0 if ! exists $dblock{$k}{$l};
|
|||
|
}
|
|||
|
}
|
|||
|
if ($critical) {
|
|||
|
for my $l (keys %{$critical}) {
|
|||
|
$dblock{$k}{$l} = 0 if ! exists $dblock{$k}{$l};
|
|||
|
}
|
|||
|
}
|
|||
|
for my $m (keys %{$dblock{$k}}){
|
|||
|
$totallock{$m} += $dblock{$k}{$m};
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if ($MRTG) {
|
|||
|
do_mrtg( {one => $totallock{total}, msg => "DB: $db->{dbname}" } );
|
|||
|
}
|
|||
|
|
|||
|
# Nagios perfdata output
|
|||
|
for my $dbname (sort keys %dblock) {
|
|||
|
for my $type (sort keys %{ $dblock{$dbname} }) {
|
|||
|
next if ((! $critical or ! exists $critical->{$type})
|
|||
|
and (!$warning or ! exists $warning->{$type}));
|
|||
|
$db->{perf} .= sprintf ' %s=%s;',
|
|||
|
perfname("$dbname.$type"), $dblock{$dbname}{$type};
|
|||
|
if ($warning and exists $warning->{$type}) {
|
|||
|
$db->{perf} .= $warning->{$type};
|
|||
|
}
|
|||
|
if ($critical and $critical->{$type}) {
|
|||
|
$db->{perf} .= ";$critical->{$type}";
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if (!$gotone) {
|
|||
|
add_unknown msg('no-match-db');
|
|||
|
next;
|
|||
|
}
|
|||
|
|
|||
|
## If not specific errors, just use the total
|
|||
|
my $ok = 1;
|
|||
|
for my $type (keys %totallock) {
|
|||
|
if ($critical and exists $critical->{$type} and $totallock{$type} >= $critical->{$type}) {
|
|||
|
($type eq 'total')
|
|||
|
? add_critical msg('locks-msg2', $totallock{total})
|
|||
|
: add_critical msg('locks-msg', $type, $totallock{$type});
|
|||
|
$ok = 0;
|
|||
|
}
|
|||
|
if ($warning and exists $warning->{$type} and $totallock{$type} >= $warning->{$type}) {
|
|||
|
($type eq 'total')
|
|||
|
? add_warning msg('locks-msg2', $totallock{total})
|
|||
|
: add_warning msg('locks-msg', $type, $totallock{$type});
|
|||
|
$ok = 0;
|
|||
|
}
|
|||
|
}
|
|||
|
if ($ok) {
|
|||
|
my %show;
|
|||
|
if (!keys %critical and !keys %warning) {
|
|||
|
$show{total} = 1;
|
|||
|
}
|
|||
|
for my $type (keys %critical) {
|
|||
|
$show{$type} = 1;
|
|||
|
}
|
|||
|
for my $type (keys %warning) {
|
|||
|
$show{$type} = 1;
|
|||
|
}
|
|||
|
my $msg = '';
|
|||
|
for (sort keys %show) {
|
|||
|
$msg .= sprintf "$_=%d ", $totallock{$_} || 0;
|
|||
|
}
|
|||
|
add_ok $msg;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_locks
|
|||
|
|
|||
|
|
|||
|
sub check_logfile {
|
|||
|
|
|||
|
## Make sure the logfile is getting written to
|
|||
|
## Supports: Nagios, MRTG
|
|||
|
## Especially useful for syslog redirectors
|
|||
|
## Should be run on the system housing the logs
|
|||
|
## Optional argument "logfile" tells where the logfile is
|
|||
|
## Allows for some conversion characters.
|
|||
|
## Example: --logfile="/syslog/%Y-m%-d%/H%/postgres.log"
|
|||
|
## Critical and warning are not used: it's either ok or critical.
|
|||
|
|
|||
|
my $critwarn = $opt{warning} ? 0 : 1;
|
|||
|
|
|||
|
$SQL = q{
|
|||
|
SELECT name, CASE WHEN length(setting)<1 THEN '?' ELSE setting END AS s
|
|||
|
FROM pg_settings
|
|||
|
WHERE name IN ('log_destination','log_directory','log_filename','redirect_stderr','syslog_facility')
|
|||
|
ORDER BY name
|
|||
|
};
|
|||
|
|
|||
|
my $logfilere = qr{^[\w_\s\/%\-\.]+$};
|
|||
|
if (exists $opt{logfile} and $opt{logfile} !~ $logfilere) {
|
|||
|
ndie msg('logfile-opt-bad');
|
|||
|
}
|
|||
|
|
|||
|
my $info = run_command($SQL);
|
|||
|
$VERBOSE >= 3 and warn Dumper $info;
|
|||
|
|
|||
|
for $db (@{$info->{db}}) {
|
|||
|
my $i;
|
|||
|
for my $r (@{$db->{slurp}}) {
|
|||
|
$i->{$r->{name}} = $r->{s} || '?';
|
|||
|
}
|
|||
|
for my $word (qw{ log_destination log_directory log_filename redirect_stderr syslog_facility }) {
|
|||
|
$i->{$word} = '?' if ! exists $i->{$word};
|
|||
|
}
|
|||
|
|
|||
|
## Figure out what we think the log file will be
|
|||
|
my $logfile ='';
|
|||
|
if (exists $opt{logfile} and $opt{logfile} =~ /\w/) {
|
|||
|
$logfile = $opt{logfile};
|
|||
|
}
|
|||
|
else {
|
|||
|
if ($i->{log_destination} eq 'syslog') {
|
|||
|
## We'll make a best effort to figure out where it is. Using the --logfile option is preferred.
|
|||
|
$logfile = '/var/log/messages';
|
|||
|
if (open my $cfh, '<', '/etc/syslog.conf') {
|
|||
|
while (<$cfh>) {
|
|||
|
if (/\b$i->{syslog_facility}\.(?!none).+?([\w\/]+)$/i) {
|
|||
|
$logfile = $1;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
if (!$logfile or ! -e $logfile) {
|
|||
|
ndie msg('logfile-syslog', $i->{syslog_facility});
|
|||
|
}
|
|||
|
}
|
|||
|
elsif ($i->{log_destination} eq 'stderr') {
|
|||
|
if ($i->{redirect_stderr} ne 'yes') {
|
|||
|
ndie msg('logfile-stderr');
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
## We now have a logfile (or a template)..parse it into pieces.
|
|||
|
## We need at least hour, day, month, year
|
|||
|
my @t = localtime;
|
|||
|
my ($H,$d,$m,$Y) = (sprintf ('%02d',$t[2]),sprintf('%02d',$t[3]),sprintf('%02d',$t[4]+1),$t[5]+1900);
|
|||
|
my $y = substr($Y,2,4);
|
|||
|
if ($logfile !~ $logfilere) {
|
|||
|
ndie msg('logfile-bad',$logfile);
|
|||
|
}
|
|||
|
$logfile =~ s/%%/~~/g;
|
|||
|
$logfile =~ s/%Y/$Y/g;
|
|||
|
$logfile =~ s/%y/$y/g;
|
|||
|
$logfile =~ s/%m/$m/g;
|
|||
|
$logfile =~ s/%d/$d/g;
|
|||
|
$logfile =~ s/%H/$H/g;
|
|||
|
|
|||
|
$VERBOSE >= 3 and warn msg('logfile-debug', $logfile);
|
|||
|
|
|||
|
if (! -e $logfile) {
|
|||
|
my $msg = msg('logfile-dne', $logfile);
|
|||
|
$MRTG and ndie $msg;
|
|||
|
if ($critwarn) {
|
|||
|
add_unknown $msg;
|
|||
|
}
|
|||
|
else {
|
|||
|
add_warning $msg;
|
|||
|
}
|
|||
|
next;
|
|||
|
}
|
|||
|
my $logfh;
|
|||
|
unless (open $logfh, '<', $logfile) {
|
|||
|
add_unknown msg('logfile-openfail', $logfile, $!);
|
|||
|
next;
|
|||
|
}
|
|||
|
seek($logfh, 0, 2) or ndie msg('logfile-seekfail', $logfile, $!);
|
|||
|
|
|||
|
## Throw a custom error string.
|
|||
|
## We do the number first as old versions only show part of the string.
|
|||
|
my $random_number = int rand(999999999999);
|
|||
|
my $funky = sprintf "check_postgres_logfile_error_$random_number $ME DB=$db->{dbname} PID=$$ Time=%s",
|
|||
|
scalar localtime;
|
|||
|
|
|||
|
## Cause an error on just this target
|
|||
|
delete @{$db}{qw(ok slurp totaltime)};
|
|||
|
my $badinfo = run_command("$funky", {failok => 1, target => $db} );
|
|||
|
|
|||
|
my $MAXSLEEPTIME = $opt{timeout} || 20;
|
|||
|
my $SLEEP = 1;
|
|||
|
my $found = 0;
|
|||
|
LOGWAIT: {
|
|||
|
sleep $SLEEP;
|
|||
|
seek $logfh, 0, 1 or ndie msg('logfile-seekfail', $logfile, $!);
|
|||
|
while (<$logfh>) {
|
|||
|
if (/logfile_error_$random_number/) { ## Some logs break things up, so we don't use funky
|
|||
|
$found = 1;
|
|||
|
last LOGWAIT;
|
|||
|
}
|
|||
|
}
|
|||
|
$MAXSLEEPTIME -= $SLEEP;
|
|||
|
redo if $MAXSLEEPTIME > 0;
|
|||
|
my $msg = msg('logfile-fail', $logfile);
|
|||
|
$MRTG and do_mrtg({one => 0, msg => $msg});
|
|||
|
if ($critwarn) {
|
|||
|
add_critical $msg;
|
|||
|
}
|
|||
|
else {
|
|||
|
add_warning $msg;
|
|||
|
}
|
|||
|
}
|
|||
|
close $logfh or ndie msg('file-noclose', $logfile, $!);
|
|||
|
|
|||
|
if ($found == 1) {
|
|||
|
$MRTG and do_mrtg({one => 1});
|
|||
|
add_ok msg('logfile-ok', $logfile);
|
|||
|
}
|
|||
|
}
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_logfile
|
|||
|
|
|||
|
|
|||
|
sub find_new_version {
|
|||
|
|
|||
|
## Check for newer versions of some program
|
|||
|
|
|||
|
my $program = shift or die;
|
|||
|
my $exec = shift or die;
|
|||
|
my $url = shift or die;
|
|||
|
|
|||
|
## The format is X.Y.Z [optional message]
|
|||
|
my $versionre = qr{((\d+)\.(\d+)\.(\d+))\s*(.*)};
|
|||
|
my ($cversion,$cmajor,$cminor,$crevision,$cmessage) = ('','','','','');
|
|||
|
my $found = 0;
|
|||
|
|
|||
|
## Try to fetch the current version from the web
|
|||
|
for my $meth (@get_methods) {
|
|||
|
eval {
|
|||
|
my $COM = "$meth $url";
|
|||
|
$VERBOSE >= 1 and warn "TRYING: $COM\n";
|
|||
|
my $info = qx{$COM 2>/dev/null};
|
|||
|
## Postgres is slightly different
|
|||
|
if ($program eq 'Postgres') {
|
|||
|
$cmajor = {};
|
|||
|
while ($info =~ /<title>(\d+)\.(\d+)\.(\d+)/g) {
|
|||
|
$found = 1;
|
|||
|
$cmajor->{"$1.$2"} = $3;
|
|||
|
}
|
|||
|
}
|
|||
|
elsif ($info =~ $versionre) {
|
|||
|
$found = 1;
|
|||
|
($cversion,$cmajor,$cminor,$crevision,$cmessage) = ($1, int $2, int $3, int $4, $5);
|
|||
|
if ($VERBOSE >= 1) {
|
|||
|
$info =~ s/\s+$//s;
|
|||
|
warn "Remote version string: $info\n";
|
|||
|
warn "Remote version: $cversion\n";
|
|||
|
}
|
|||
|
}
|
|||
|
};
|
|||
|
last if $found;
|
|||
|
}
|
|||
|
|
|||
|
if (! $found) {
|
|||
|
add_unknown msg('new-ver-nocver', $program);
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
## Figure out the local copy's version
|
|||
|
my $output;
|
|||
|
eval {
|
|||
|
## We may already know the version (e.g. ourselves)
|
|||
|
$output = ($exec =~ /\d+\.\d+/) ? $exec : qx{$exec --version 2>&1};
|
|||
|
};
|
|||
|
if ($@ or !$output) {
|
|||
|
if ($program eq 'tail_n_mail') {
|
|||
|
## Check for the old name
|
|||
|
eval {
|
|||
|
$output = qx{tail_n_mail.pl --version 2>&1};
|
|||
|
};
|
|||
|
}
|
|||
|
if ($@ or !$output) {
|
|||
|
add_unknown msg('new-ver-badver', $program);
|
|||
|
return;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if ($output !~ $versionre) {
|
|||
|
add_unknown msg('new-ver-nolver', $program);
|
|||
|
return;
|
|||
|
}
|
|||
|
my ($lversion,$lmajor,$lminor,$lrevision) = ($1, int $2, int $3, int $4);
|
|||
|
if ($VERBOSE >= 1) {
|
|||
|
$output =~ s/\s+$//s;
|
|||
|
warn "Local version string: $output\n";
|
|||
|
warn "Local version: $lversion\n";
|
|||
|
}
|
|||
|
|
|||
|
## Postgres is a special case
|
|||
|
if ($program eq 'Postgres') {
|
|||
|
my $lver = "$lmajor.$lminor";
|
|||
|
if (! exists $cmajor->{$lver}) {
|
|||
|
add_unknown msg('new-ver-nocver', $program);
|
|||
|
return;
|
|||
|
}
|
|||
|
$crevision = $cmajor->{$lver};
|
|||
|
$cmajor = $lmajor;
|
|||
|
$cminor = $lminor;
|
|||
|
$cversion = "$cmajor.$cminor.$crevision";
|
|||
|
}
|
|||
|
|
|||
|
## Most common case: everything matches
|
|||
|
if ($lversion eq $cversion) {
|
|||
|
add_ok msg('new-ver-ok', $lversion, $program);
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
## Check for a revision update
|
|||
|
if ($lmajor==$cmajor and $lminor==$cminor and $lrevision<$crevision) {
|
|||
|
add_critical msg('new-ver-warn', $cversion, $program, $lversion);
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
## Check for a major update
|
|||
|
if ($lmajor<$cmajor or ($lmajor==$cmajor and $lminor<$cminor)) {
|
|||
|
add_warning msg('new-ver-warn', $cversion, $program, $lversion);
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
## Anything else must be time travel, which we cannot handle
|
|||
|
add_unknown msg('new-ver-tt', $program, $lversion, $cversion);
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of find_new_version
|
|||
|
|
|||
|
|
|||
|
sub check_new_version_bc {
|
|||
|
|
|||
|
## Check if a newer version of Bucardo is available
|
|||
|
|
|||
|
my $url = 'http://bucardo.org/bucardo/latest_version.txt';
|
|||
|
find_new_version('Bucardo', 'bucardo_ctl', $url);
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_new_version_bc
|
|||
|
|
|||
|
|
|||
|
sub check_new_version_box {
|
|||
|
|
|||
|
## Check if a newer version of boxinfo is available
|
|||
|
|
|||
|
my $url = 'http://bucardo.org/boxinfo/latest_version.txt';
|
|||
|
find_new_version('boxinfo', 'boxinfo.pl', $url);
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_new_version_box
|
|||
|
|
|||
|
|
|||
|
sub check_new_version_cp {
|
|||
|
|
|||
|
## Check if a new version of check_postgres.pl is available
|
|||
|
|
|||
|
my $url = 'http://bucardo.org/check_postgres/latest_version.txt';
|
|||
|
find_new_version('check_postgres', $VERSION, $url);
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_new_version_cp
|
|||
|
|
|||
|
|
|||
|
sub check_new_version_pg {
|
|||
|
|
|||
|
## Check if a new version of Postgres is available
|
|||
|
|
|||
|
my $url = 'http://www.postgresql.org/versions.rss';
|
|||
|
|
|||
|
## Grab the local version
|
|||
|
my $info = run_command('SELECT version() AS version');
|
|||
|
my $lversion = $info->{db}[0]{slurp}[0]{version};
|
|||
|
## Make sure it is parseable and check for development versions
|
|||
|
if ($lversion !~ /\d+\.\d+\.\d+/) {
|
|||
|
if ($lversion =~ /(\d+\.\d+\S+)/) {
|
|||
|
add_ok msg('new-ver-dev', 'Postgres', $1);
|
|||
|
return;
|
|||
|
}
|
|||
|
add_unknown msg('new-ver-nolver', 'Postgres');
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
find_new_version('Postgres', $lversion, $url);
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_new_version_pg
|
|||
|
|
|||
|
|
|||
|
sub check_new_version_tnm {
|
|||
|
|
|||
|
## Check if a new version of tail_n_mail is available
|
|||
|
|
|||
|
my $url = 'http://bucardo.org/tail_n_mail/latest_version.txt';
|
|||
|
find_new_version('tail_n_mail', 'tail_n_mail', $url);
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_new_version_tnm
|
|||
|
|
|||
|
|
|||
|
sub check_pgbouncer_checksum {
|
|||
|
|
|||
|
## Verify the checksum of all pgbouncer settings
|
|||
|
## Supports: Nagios, MRTG
|
|||
|
## Not that the connection will be done on the pgbouncer database
|
|||
|
## One of warning or critical must be given (but not both)
|
|||
|
## It should run one time to find out the expected checksum
|
|||
|
## You can use --critical="0" to find out the checksum
|
|||
|
## You can include or exclude settings as well
|
|||
|
## Example:
|
|||
|
## check_postgres_pgbouncer_checksum --critical="4e7ba68eb88915d3d1a36b2009da4acd"
|
|||
|
|
|||
|
my ($warning, $critical) = validate_range({type => 'checksum', onlyone => 1});
|
|||
|
|
|||
|
eval {
|
|||
|
require Digest::MD5;
|
|||
|
};
|
|||
|
if ($@) {
|
|||
|
ndie msg('checksum-nomd');
|
|||
|
}
|
|||
|
|
|||
|
$SQL = 'SHOW CONFIG';
|
|||
|
my $info = run_command($SQL, { regex => qr[log_pooler_errors] });
|
|||
|
|
|||
|
$db = $info->{db}[0];
|
|||
|
|
|||
|
my $newstring = '';
|
|||
|
for my $r (@{$db->{slurp}}) {
|
|||
|
my $key = $r->{key};
|
|||
|
next if skip_item($key);
|
|||
|
$newstring .= "$r->{key} = $r->{value}\n";
|
|||
|
}
|
|||
|
|
|||
|
if (! length $newstring) {
|
|||
|
add_unknown msg('no-match-set');
|
|||
|
}
|
|||
|
|
|||
|
my $checksum = Digest::MD5::md5_hex($newstring);
|
|||
|
|
|||
|
my $msg = msg('checksum-msg', $checksum);
|
|||
|
if ($MRTG) {
|
|||
|
$opt{mrtg} or ndie msg('checksum-nomrtg');
|
|||
|
do_mrtg({one => $opt{mrtg} eq $checksum ? 1 : 0, msg => $checksum});
|
|||
|
}
|
|||
|
if ($critical and $critical ne $checksum) {
|
|||
|
add_critical $msg;
|
|||
|
}
|
|||
|
elsif ($warning and $warning ne $checksum) {
|
|||
|
add_warning $msg;
|
|||
|
}
|
|||
|
elsif (!$critical and !$warning) {
|
|||
|
add_unknown $msg;
|
|||
|
}
|
|||
|
else {
|
|||
|
add_ok $msg;
|
|||
|
}
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_pgbouncer_checksum
|
|||
|
|
|||
|
sub check_pgbouncer_backends {
|
|||
|
|
|||
|
## Check the number of connections to pgbouncer compared to
|
|||
|
## max_client_conn
|
|||
|
## Supports: Nagios, MRTG
|
|||
|
## It makes no sense to run this more than once on the same cluster
|
|||
|
## Need to be superuser, else only your queries will be visible
|
|||
|
## Warning and criticals can take three forms:
|
|||
|
## critical = 12 -- complain if there are 12 or more connections
|
|||
|
## critical = 95% -- complain if >= 95% of available connections are used
|
|||
|
## critical = -5 -- complain if there are only 5 or fewer connection slots left
|
|||
|
## The former two options only work with simple numbers - no percentage or negative
|
|||
|
## Can also ignore databases with exclude, and limit with include
|
|||
|
|
|||
|
my $warning = $opt{warning} || '90%';
|
|||
|
my $critical = $opt{critical} || '95%';
|
|||
|
my $noidle = $opt{noidle} || 0;
|
|||
|
|
|||
|
## If only critical was used, remove the default warning
|
|||
|
if ($opt{critical} and !$opt{warning}) {
|
|||
|
$warning = $critical;
|
|||
|
}
|
|||
|
|
|||
|
my $validre = qr{^(\-?)(\d+)(\%?)$};
|
|||
|
if ($critical !~ $validre) {
|
|||
|
ndie msg('pgb-backends-users', 'Critical');
|
|||
|
}
|
|||
|
my ($e1,$e2,$e3) = ($1,$2,$3);
|
|||
|
if ($warning !~ $validre) {
|
|||
|
ndie msg('pgb-backends-users', 'Warning');
|
|||
|
}
|
|||
|
my ($w1,$w2,$w3) = ($1,$2,$3);
|
|||
|
|
|||
|
## If number is greater, all else is same, and not minus
|
|||
|
if ($w2 > $e2 and $w1 eq $e1 and $w3 eq $e3 and $w1 eq '') {
|
|||
|
ndie msg('range-warnbig');
|
|||
|
}
|
|||
|
## If number is less, all else is same, and minus
|
|||
|
if ($w2 < $e2 and $w1 eq $e1 and $w3 eq $e3 and $w1 eq '-') {
|
|||
|
ndie msg('range-warnsmall');
|
|||
|
}
|
|||
|
if (($w1 and $w3) or ($e1 and $e3)) {
|
|||
|
ndie msg('range-neg-percent');
|
|||
|
}
|
|||
|
|
|||
|
## Grab information from the config
|
|||
|
$SQL = 'SHOW CONFIG';
|
|||
|
|
|||
|
my $info = run_command($SQL, { regex => qr{\d+}, emptyok => 1 } );
|
|||
|
|
|||
|
## Default values for information gathered
|
|||
|
my $limit = 0;
|
|||
|
|
|||
|
## Determine max_client_conn
|
|||
|
for my $r (@{$info->{db}[0]{slurp}}) {
|
|||
|
if ($r->{key} eq 'max_client_conn') {
|
|||
|
$limit = $r->{value};
|
|||
|
last;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
## Grab information from pools
|
|||
|
$SQL = 'SHOW POOLS';
|
|||
|
|
|||
|
$info = run_command($SQL, { regex => qr{\d+}, emptyok => 1 } );
|
|||
|
|
|||
|
$db = $info->{db}[0];
|
|||
|
|
|||
|
my $total = 0;
|
|||
|
my $grandtotal = @{$db->{slurp}};
|
|||
|
|
|||
|
for my $r (@{$db->{slurp}}) {
|
|||
|
|
|||
|
## Always want perf to show all
|
|||
|
my $nwarn=$w2;
|
|||
|
my $ncrit=$e2;
|
|||
|
if ($e1) {
|
|||
|
$ncrit = $limit-$e2;
|
|||
|
}
|
|||
|
elsif ($e3) {
|
|||
|
$ncrit = (int $e2*$limit/100);
|
|||
|
}
|
|||
|
if ($w1) {
|
|||
|
$nwarn = $limit-$w2;
|
|||
|
}
|
|||
|
elsif ($w3) {
|
|||
|
$nwarn = (int $w2*$limit/100)
|
|||
|
}
|
|||
|
|
|||
|
if (! skip_item($r->{database})) {
|
|||
|
my $current = $r->{cl_active} + $r->{cl_waiting};
|
|||
|
$db->{perf} .= " '$r->{database}'=$current;$nwarn;$ncrit;0;$limit";
|
|||
|
$total += $current;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if ($MRTG) {
|
|||
|
$stats{$db->{dbname}} = $total;
|
|||
|
$statsmsg{$db->{dbname}} = msg('pgb-backends-mrtg', $db->{dbname}, $limit);
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
if (!$total) {
|
|||
|
if ($grandtotal) {
|
|||
|
## We assume that exclude/include rules are correct, and we simply had no entries
|
|||
|
## at all in the specific databases we wanted
|
|||
|
add_ok msg('pgb-backends-none');
|
|||
|
}
|
|||
|
else {
|
|||
|
add_unknown msg('no-match-db');
|
|||
|
}
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
my $percent = (int $total / $limit*100) || 1;
|
|||
|
my $msg = msg('pgb-backends-msg', $total, $limit, $percent);
|
|||
|
my $ok = 1;
|
|||
|
|
|||
|
if ($e1) { ## minus
|
|||
|
$ok = 0 if $limit-$total <= $e2;
|
|||
|
}
|
|||
|
elsif ($e3) { ## percent
|
|||
|
my $nowpercent = $total/$limit*100;
|
|||
|
$ok = 0 if $nowpercent >= $e2;
|
|||
|
}
|
|||
|
else { ## raw number
|
|||
|
$ok = 0 if $total >= $e2;
|
|||
|
}
|
|||
|
if (!$ok) {
|
|||
|
add_critical $msg;
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
if ($w1) {
|
|||
|
$ok = 0 if $limit-$total <= $w2;
|
|||
|
}
|
|||
|
elsif ($w3) {
|
|||
|
my $nowpercent = $total/$limit*100;
|
|||
|
$ok = 0 if $nowpercent >= $w2;
|
|||
|
}
|
|||
|
else {
|
|||
|
$ok = 0 if $total >= $w2;
|
|||
|
}
|
|||
|
if (!$ok) {
|
|||
|
add_warning $msg;
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
add_ok $msg;
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_pgbouncer_backends
|
|||
|
|
|||
|
|
|||
|
|
|||
|
sub check_pgb_pool {
|
|||
|
|
|||
|
# Check various bits of the pgbouncer SHOW POOLS ouptut
|
|||
|
my $stat = shift;
|
|||
|
my ($warning, $critical) = validate_range({type => 'positive integer'});
|
|||
|
|
|||
|
$SQL = 'SHOW POOLS';
|
|||
|
my $info = run_command($SQL, { regex => qr[$stat] });
|
|||
|
|
|||
|
$db = $info->{db}[0];
|
|||
|
my $output = $db->{slurp};
|
|||
|
my $gotone = 0;
|
|||
|
for my $i (@$output) {
|
|||
|
next if skip_item($i->{database});
|
|||
|
my $msg = "$i->{database}=$i->{$stat}";
|
|||
|
|
|||
|
if ($MRTG) {
|
|||
|
$stats{$i->{database}} = $i->{$stat};
|
|||
|
$statsmsg{$i->{database}} = msg('pgbouncer-pool', $i->{database}, $stat, $i->{$stat});
|
|||
|
next;
|
|||
|
}
|
|||
|
|
|||
|
if ($critical and $i->{$stat} >= $critical) {
|
|||
|
add_critical $msg;
|
|||
|
}
|
|||
|
elsif ($warning and $i->{$stat} >= $warning) {
|
|||
|
add_warning $msg;
|
|||
|
}
|
|||
|
else {
|
|||
|
add_ok $msg;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_pgb_pool
|
|||
|
|
|||
|
|
|||
|
sub check_prepared_txns {
|
|||
|
|
|||
|
## Checks age of prepared transactions
|
|||
|
## Most installations probably want no prepared_transactions
|
|||
|
## Supports: Nagios, MRTG
|
|||
|
|
|||
|
my ($warning, $critical) = validate_range
|
|||
|
({
|
|||
|
type => 'seconds',
|
|||
|
default_warning => '1',
|
|||
|
default_critical => '30',
|
|||
|
});
|
|||
|
|
|||
|
my $SQL = q{
|
|||
|
SELECT database, ROUND(EXTRACT(epoch FROM now()-prepared)) AS age, prepared
|
|||
|
FROM pg_prepared_xacts
|
|||
|
ORDER BY prepared ASC
|
|||
|
};
|
|||
|
|
|||
|
my $info = run_command($SQL, {regex => qr[\w+], emptyok => 1 } );
|
|||
|
|
|||
|
my $msg = msg('preptxn-none');
|
|||
|
my $found = 0;
|
|||
|
for $db (@{$info->{db}}) {
|
|||
|
my (@crit,@warn,@ok);
|
|||
|
my ($maxage,$maxdb) = (0,''); ## used by MRTG only
|
|||
|
ROW: for my $r (@{$db->{slurp}}) {
|
|||
|
my ($dbname,$age,$date) = ($r->{database},$r->{age},$r->{prepared});
|
|||
|
$found = 1 if ! $found;
|
|||
|
next ROW if skip_item($dbname);
|
|||
|
$found = 2;
|
|||
|
if ($MRTG) {
|
|||
|
if ($age > $maxage) {
|
|||
|
$maxdb = $dbname;
|
|||
|
$maxage = $age;
|
|||
|
}
|
|||
|
elsif ($age == $maxage) {
|
|||
|
$maxdb .= sprintf "%s$dbname", length $maxdb ? ' | ' : '';
|
|||
|
}
|
|||
|
next;
|
|||
|
}
|
|||
|
|
|||
|
$msg = "$dbname=$date ($age)";
|
|||
|
$db->{perf} .= sprintf ' %s=%ss;%s;%s',
|
|||
|
perfname($dbname), $age, $warning, $critical;
|
|||
|
if (length $critical and $age >= $critical) {
|
|||
|
push @crit => $msg;
|
|||
|
}
|
|||
|
elsif (length $warning and $age >= $warning) {
|
|||
|
push @warn => $msg;
|
|||
|
}
|
|||
|
else {
|
|||
|
push @ok => $msg;
|
|||
|
}
|
|||
|
}
|
|||
|
if ($MRTG) {
|
|||
|
do_mrtg({one => $maxage, msg => $maxdb});
|
|||
|
}
|
|||
|
elsif (0 == $found) {
|
|||
|
add_ok msg('preptxn-none');
|
|||
|
}
|
|||
|
elsif (1 == $found) {
|
|||
|
add_unknown msg('no-match-db');
|
|||
|
}
|
|||
|
elsif (@crit) {
|
|||
|
add_critical join ' ' => @crit;
|
|||
|
}
|
|||
|
elsif (@warn) {
|
|||
|
add_warning join ' ' => @warn;
|
|||
|
}
|
|||
|
else {
|
|||
|
add_ok join ' ' => @ok;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_prepared_txns
|
|||
|
|
|||
|
|
|||
|
sub check_query_runtime {
|
|||
|
|
|||
|
## Make sure a known query runs at least as fast as we think it should
|
|||
|
## Supports: Nagios, MRTG
|
|||
|
## Warning and critical are time limits, defaulting to seconds
|
|||
|
## Valid units: s[econd], m[inute], h[our], d[ay]
|
|||
|
## Does a "EXPLAIN ANALYZE SELECT COUNT(1) FROM xyz"
|
|||
|
## where xyz is given by the option --queryname
|
|||
|
## This could also be a table or a function, or course, but must be a
|
|||
|
## single word. If a function, it must be empty (with "()")
|
|||
|
## Examples:
|
|||
|
## --warning="100s" --critical="120s" --queryname="speedtest1"
|
|||
|
## --warning="5min" --critical="15min" --queryname="speedtest()"
|
|||
|
|
|||
|
my ($warning, $critical) = validate_range({type => 'time'});
|
|||
|
|
|||
|
my $queryname = $opt{queryname} || '';
|
|||
|
|
|||
|
if ($queryname !~ /^[\w\_\.]+(?:\(\))?$/) {
|
|||
|
ndie msg('runtime-badname');
|
|||
|
}
|
|||
|
|
|||
|
$SQL = "EXPLAIN ANALYZE SELECT COUNT(1) FROM $queryname";
|
|||
|
my $info = run_command($SQL);
|
|||
|
|
|||
|
for $db (@{$info->{db}}) {
|
|||
|
if (! exists $db->{slurp}[0]{queryplan}) {
|
|||
|
add_unknown msg('invalid-query', $db->{slurp});
|
|||
|
next;
|
|||
|
}
|
|||
|
my $totalms = -1;
|
|||
|
for my $r (@{$db->{slurp}}) {
|
|||
|
if ($r->{queryplan} =~ / (\d+\.\d+) ms/) {
|
|||
|
$totalms = $1;
|
|||
|
}
|
|||
|
}
|
|||
|
my $totalseconds = sprintf '%.2f', $totalms / 1000.0;
|
|||
|
if ($MRTG) {
|
|||
|
$stats{$db->{dbname}} = $totalseconds;
|
|||
|
next;
|
|||
|
}
|
|||
|
$db->{perf} = sprintf '%s=%ss;%s;%s',
|
|||
|
perfname(msg('query-time')), $totalseconds, $warning, $critical;
|
|||
|
my $msg = msg('runtime-msg', $totalseconds);
|
|||
|
if (length $critical and $totalseconds >= $critical) {
|
|||
|
add_critical $msg;
|
|||
|
}
|
|||
|
elsif (length $warning and $totalseconds >= $warning) {
|
|||
|
add_warning $msg;
|
|||
|
}
|
|||
|
else {
|
|||
|
add_ok $msg;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
$MRTG and do_mrtg_stats(msg('runtime-badmrtg'));
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_query_runtime
|
|||
|
|
|||
|
|
|||
|
sub check_query_time {
|
|||
|
|
|||
|
## Check the length of running queries
|
|||
|
|
|||
|
check_txn_idle('qtime',
|
|||
|
msg('queries'),
|
|||
|
msg('query-time'),
|
|||
|
'query_start',
|
|||
|
q{query_start IS NOT NULL AND current_query NOT LIKE '<IDLE>%'});
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_query_time
|
|||
|
|
|||
|
|
|||
|
sub check_relation_size {
|
|||
|
|
|||
|
my $relkind = shift || 'relation';
|
|||
|
|
|||
|
## Check the size of one or more relations
|
|||
|
## Supports: Nagios, MRTG
|
|||
|
## By default, checks all relations
|
|||
|
## Can check specific one(s) with include
|
|||
|
## Can ignore some with exclude
|
|||
|
## Warning and critical are bytes
|
|||
|
## Valid units: b, k, m, g, t, e
|
|||
|
## All above may be written as plural or with a trailing 'g'
|
|||
|
## Limit to a specific user (relation owner) with the includeuser option
|
|||
|
## Exclude users with the excludeuser option
|
|||
|
|
|||
|
my ($warning, $critical) = validate_range({type => 'size'});
|
|||
|
|
|||
|
$SQL = sprintf q{
|
|||
|
SELECT pg_relation_size(c.oid) AS rsize,
|
|||
|
pg_size_pretty(pg_relation_size(c.oid)) AS psize,
|
|||
|
relkind, relname, nspname
|
|||
|
FROM pg_class c, pg_namespace n WHERE (relkind = %s) AND n.oid = c.relnamespace
|
|||
|
},
|
|||
|
$relkind eq 'table' ? q{'r'}
|
|||
|
: $relkind eq 'index' ? q{'i'}
|
|||
|
: q{'r' OR relkind = 'i'};
|
|||
|
|
|||
|
if ($opt{perflimit}) {
|
|||
|
$SQL .= " ORDER BY 1 DESC LIMIT $opt{perflimit}";
|
|||
|
}
|
|||
|
|
|||
|
if ($USERWHERECLAUSE) {
|
|||
|
$SQL =~ s/ WHERE/, pg_user u WHERE u.usesysid=c.relowner$USERWHERECLAUSE AND/;
|
|||
|
}
|
|||
|
|
|||
|
my $info = run_command($SQL, {emptyok => 1});
|
|||
|
|
|||
|
my $found = 0;
|
|||
|
for $db (@{$info->{db}}) {
|
|||
|
|
|||
|
$found = 1;
|
|||
|
if ($db->{slurp}[0]{rsize} !~ /\d/ and $USERWHERECLAUSE) {
|
|||
|
$stats{$db->{dbname}} = 0;
|
|||
|
add_ok msg('no-match-user');
|
|||
|
next;
|
|||
|
}
|
|||
|
|
|||
|
my ($max,$pmax,$kmax,$nmax,$smax) = (-1,0,0,'?','?');
|
|||
|
|
|||
|
ROW: for my $r (@{$db->{slurp}}) {
|
|||
|
my ($size,$psize,$kind,$name,$schema) = @$r{qw/ rsize psize relkind relname nspname/};
|
|||
|
|
|||
|
next ROW if skip_item($name, $schema);
|
|||
|
|
|||
|
my $nicename = $kind eq 'r' ? "$schema.$name" : $name;
|
|||
|
|
|||
|
$db->{perf} .= sprintf '%s%s=%sB;%s;%s',
|
|||
|
$VERBOSE==1 ? "\n" : ' ',
|
|||
|
perfname($nicename), $size, $warning, $critical;
|
|||
|
($max=$size, $pmax=$psize, $kmax=$kind, $nmax=$name, $smax=$schema) if $size > $max;
|
|||
|
}
|
|||
|
if ($max < 0) {
|
|||
|
add_unknown msg('no-match-rel');
|
|||
|
next;
|
|||
|
}
|
|||
|
if ($MRTG) {
|
|||
|
my $msg = sprintf 'DB: %s %s %s%s',
|
|||
|
$db->{dbname},
|
|||
|
$kmax eq 'i' ? 'INDEX:' : 'TABLE:',
|
|||
|
$kmax eq 'i' ? '' : "$smax.",
|
|||
|
$nmax;
|
|||
|
do_mrtg({one => $max, msg => $msg});
|
|||
|
next;
|
|||
|
}
|
|||
|
|
|||
|
my $msg;
|
|||
|
if ($relkind eq 'relation') {
|
|||
|
if ($kmax eq 'r') {
|
|||
|
$msg = msg('relsize-msg-relt', "$smax.$nmax", $pmax);
|
|||
|
}
|
|||
|
else {
|
|||
|
$msg = msg('relsize-msg-reli', $nmax, $pmax);
|
|||
|
}
|
|||
|
}
|
|||
|
elsif ($relkind eq 'table') {
|
|||
|
$msg = msg('relsize-msg-tab', "$smax.$nmax", $pmax);
|
|||
|
}
|
|||
|
else {
|
|||
|
$msg = msg('relsize-msg-ind', $nmax, $pmax);
|
|||
|
}
|
|||
|
if (length $critical and $max >= $critical) {
|
|||
|
add_critical $msg;
|
|||
|
}
|
|||
|
elsif (length $warning and $max >= $warning) {
|
|||
|
add_warning $msg;
|
|||
|
}
|
|||
|
else {
|
|||
|
add_ok $msg;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_relation_size
|
|||
|
|
|||
|
|
|||
|
sub check_table_size {
|
|||
|
return check_relation_size('table');
|
|||
|
}
|
|||
|
sub check_index_size {
|
|||
|
return check_relation_size('index');
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
sub check_replicate_row {
|
|||
|
|
|||
|
## Make an update on one server, make sure it propogates to others
|
|||
|
## Supports: Nagios, MRTG
|
|||
|
## Warning and critical are time to replicate to all slaves
|
|||
|
|
|||
|
my ($warning, $critical) = validate_range({type => 'time', leastone => 1, forcemrtg => 1});
|
|||
|
|
|||
|
if ($warning and $critical and $warning > $critical) {
|
|||
|
ndie msg('range-warnbig');
|
|||
|
}
|
|||
|
|
|||
|
if (!$opt{repinfo}) {
|
|||
|
ndie msg('rep-noarg');
|
|||
|
}
|
|||
|
my @repinfo = split /,/ => ($opt{repinfo} || '');
|
|||
|
if ($#repinfo != 5) {
|
|||
|
ndie msg('rep-badarg');
|
|||
|
}
|
|||
|
my ($table,$pk,$id,$col,$val1,$val2) = (@repinfo);
|
|||
|
|
|||
|
## Quote everything, just to be safe (e.g. columns named 'desc')
|
|||
|
$table = qq{"$table"};
|
|||
|
$pk = qq{"$pk"};
|
|||
|
$col = qq{"$col"};
|
|||
|
|
|||
|
if ($val1 eq $val2) {
|
|||
|
ndie msg('rep-duh');
|
|||
|
}
|
|||
|
|
|||
|
$SQL = qq{UPDATE $table SET $col = 'X' WHERE $pk = '$id'};
|
|||
|
(my $update1 = $SQL) =~ s/X/$val1/;
|
|||
|
(my $update2 = $SQL) =~ s/X/$val2/;
|
|||
|
my $select = qq{SELECT $col AS c FROM $table WHERE $pk = '$id'};
|
|||
|
|
|||
|
## Are they the same on both sides? Must be yes, or we error out
|
|||
|
|
|||
|
## We assume this is a single server
|
|||
|
my $info1 = run_command($select);
|
|||
|
## Squirrel away the $db setting for later
|
|||
|
my $sourcedb = $info1->{db}[0];
|
|||
|
if (!defined $sourcedb) {
|
|||
|
ndie msg('rep-norow', "$table.$col");
|
|||
|
}
|
|||
|
my $value1 = $info1->{db}[0]{slurp}[0]{c};
|
|||
|
|
|||
|
my $info2 = run_command($select, { dbnumber => 2 });
|
|||
|
my $slave = 0;
|
|||
|
for my $d (@{$info2->{db}}) {
|
|||
|
$slave++;
|
|||
|
my $value2 = $d->{slurp}[0]{c};
|
|||
|
if ($value1 ne $value2) {
|
|||
|
ndie msg('rep-notsame');
|
|||
|
}
|
|||
|
}
|
|||
|
my $numslaves = $slave;
|
|||
|
if ($numslaves < 1) {
|
|||
|
ndie msg('rep-noslaves');
|
|||
|
}
|
|||
|
|
|||
|
my ($update,$newval);
|
|||
|
if ($value1 eq $val1) {
|
|||
|
$update = $update2;
|
|||
|
$newval = $val2;
|
|||
|
}
|
|||
|
elsif ($value1 eq $val2) {
|
|||
|
$update = $update1;
|
|||
|
$newval = $val1;
|
|||
|
}
|
|||
|
else {
|
|||
|
ndie msg('rep-wrongvals', $value1, $val1, $val2);
|
|||
|
}
|
|||
|
|
|||
|
$info1 = run_command($update, { dbnumber => 1, failok => 1 } );
|
|||
|
|
|||
|
## Make sure the update worked
|
|||
|
if (! defined $info1->{db}[0]) {
|
|||
|
ndie msg('rep-sourcefail');
|
|||
|
}
|
|||
|
|
|||
|
my $err = $info1->{db}[0]{error} || '';
|
|||
|
if ($err) {
|
|||
|
$err =~ s/ERROR://; ## e.g. Slony read-only
|
|||
|
ndie $err;
|
|||
|
}
|
|||
|
|
|||
|
## Start the clock
|
|||
|
my $starttime = time();
|
|||
|
|
|||
|
## Loop until we get a match, check each in turn
|
|||
|
my %slave;
|
|||
|
my $time = 0;
|
|||
|
LOOP: {
|
|||
|
$info2 = run_command($select, { dbnumber => 2 } );
|
|||
|
## Reset for final output
|
|||
|
$db = $sourcedb;
|
|||
|
|
|||
|
$slave = 0;
|
|||
|
for my $d (@{$info2->{db}}) {
|
|||
|
$slave++;
|
|||
|
next if exists $slave{$slave};
|
|||
|
my $value2 = $d->{slurp}[0]{c};
|
|||
|
$time = $db->{totaltime} = time - $starttime;
|
|||
|
if ($value2 eq $newval) {
|
|||
|
$slave{$slave} = $time;
|
|||
|
next;
|
|||
|
}
|
|||
|
if ($warning and $time > $warning) {
|
|||
|
$MRTG and do_mrtg({one => 0, msg => $time});
|
|||
|
add_warning msg('rep-fail', $slave);
|
|||
|
return;
|
|||
|
}
|
|||
|
elsif ($critical and $time > $critical) {
|
|||
|
$MRTG and do_mrtg({one => 0, msg => $time});
|
|||
|
add_critical msg('rep-fail', $slave);
|
|||
|
return;
|
|||
|
}
|
|||
|
}
|
|||
|
## Did they all match?
|
|||
|
my $k = keys %slave;
|
|||
|
if (keys %slave >= $numslaves) {
|
|||
|
$MRTG and do_mrtg({one => $time});
|
|||
|
add_ok msg('rep-ok');
|
|||
|
return;
|
|||
|
}
|
|||
|
sleep 1;
|
|||
|
redo;
|
|||
|
}
|
|||
|
|
|||
|
$MRTG and ndie msg('rep-timeout', $time);
|
|||
|
add_unknown msg('rep-unknown');
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_replicate_row
|
|||
|
|
|||
|
|
|||
|
sub check_same_schema {
|
|||
|
|
|||
|
## Verify that all relations inside two or more databases are the same
|
|||
|
## Supports: Nagios
|
|||
|
## Include and exclude are supported
|
|||
|
## Warning and critical are not used
|
|||
|
## The filter argument is supported
|
|||
|
|
|||
|
## We override the usual $db->{totaltime} with our own counter
|
|||
|
my $start = [gettimeofday()];
|
|||
|
|
|||
|
## Check for filtering rules, then store inside opt{filtered}
|
|||
|
my %filter;
|
|||
|
if (exists $opt{filter}) {
|
|||
|
for my $item (@{ $opt{filter} }) {
|
|||
|
## Can separate by whitespace or commas
|
|||
|
for my $phrase (split /[\s,]+/ => $item) {
|
|||
|
|
|||
|
## Can be plain (e.g. nouser) or regex based exclusion, e.g. nouser=bob
|
|||
|
next if $phrase !~ /(\w+)=?\s*(.*)/o;
|
|||
|
my ($name,$regex) = (lc $1,$2||'');
|
|||
|
|
|||
|
## Names are standardized with regards to plurals and casing
|
|||
|
$name =~ s/([aeiou])s$/$1/o;
|
|||
|
$name =~ s/s$//o;
|
|||
|
|
|||
|
if (! length $regex) {
|
|||
|
$filter{"$name"} = 1;
|
|||
|
}
|
|||
|
else {
|
|||
|
push @{$filter{"${name}_regex"}} => $regex;
|
|||
|
}
|
|||
|
}
|
|||
|
$VERBOSE >= 3 and warn Dumper \%filter;
|
|||
|
}
|
|||
|
}
|
|||
|
$opt{filtered} = \%filter;
|
|||
|
|
|||
|
## See how many databases we are using
|
|||
|
my $numdbs = @targetdb;
|
|||
|
$VERBOSE >= 3 and warn "Number of databases is $numdbs\n";
|
|||
|
|
|||
|
## If only a single database is given, this is a time-based comparison
|
|||
|
## In other words, we write and read a local file
|
|||
|
my $samedb = 0;
|
|||
|
if (1 == $numdbs) {
|
|||
|
$samedb = 1;
|
|||
|
$numdbs = 2;
|
|||
|
}
|
|||
|
|
|||
|
## Sanity check
|
|||
|
if ($opt{suffix} and ! $samedb) {
|
|||
|
ndie msg('ss-suffix');
|
|||
|
}
|
|||
|
|
|||
|
## Version information about each database, by number
|
|||
|
my %dbver;
|
|||
|
|
|||
|
## Verify we can connect to each database, and grab version information
|
|||
|
for my $num (1..$numdbs) {
|
|||
|
|
|||
|
## No need to check the same database twice!
|
|||
|
last if $samedb and $num > 1;
|
|||
|
|
|||
|
$SQL = 'SELECT version()';
|
|||
|
my $info = run_command($SQL, { dbnumber => $num } );
|
|||
|
|
|||
|
## We need a global $db, so we'll use the first database
|
|||
|
$db = $info->{db}[0] if 1 == $num;
|
|||
|
|
|||
|
my $foo = $info->{db}[0];
|
|||
|
my $version = $foo->{slurp}[0]{version};
|
|||
|
$version =~ /\D+(\d+\.\d+)(\S+)/i or die qq{Invalid version: $version\n};
|
|||
|
my ($full,$major,$revision) = ("$1$2",$1,$2);
|
|||
|
$revision =~ s/^\.//;
|
|||
|
$dbver{$num} = {
|
|||
|
full => $version,
|
|||
|
version => $full,
|
|||
|
major => $major,
|
|||
|
revision => $revision,
|
|||
|
};
|
|||
|
|
|||
|
$targetdb[$num-1]{pgversion} = $full;
|
|||
|
|
|||
|
}
|
|||
|
|
|||
|
## An ordered list of all the things we check.
|
|||
|
## Order is important here, as when reporting, some things
|
|||
|
## can mask reporting on others (e.g. no need to report missing tables
|
|||
|
## if the entire schema has already been reported as missing)
|
|||
|
## We also indicate which columns should be ignored when comparing,
|
|||
|
## as well as which columns are of a 'list' nature
|
|||
|
my @catalog_items = (
|
|||
|
[user => 'usesysid', 'useconfig' ],
|
|||
|
[language => 'laninline,lanplcallfoid,lanvalidator', '' ],
|
|||
|
[operator => '', '' ],
|
|||
|
[type => '', '' ],
|
|||
|
[schema => '', '' ],
|
|||
|
[function => 'source_checksum,prolang,prorettype', '' ],
|
|||
|
[table => 'reltype,relfrozenxid,relpages,
|
|||
|
reltuples,relnatts', '' ],
|
|||
|
[view => 'reltype', '' ],
|
|||
|
[sequence => 'reltype,log_cnt,relnatts,is_called', '' ],
|
|||
|
[index => 'relpages,reltuples,indpred,indclass,
|
|||
|
indexprs,indcheckxmin', '' ],
|
|||
|
[trigger => '', '' ],
|
|||
|
[constraint => 'conbin', '' ],
|
|||
|
[column => 'atttypid,attnum,attbyval', '' ],
|
|||
|
);
|
|||
|
|
|||
|
## Where we store all the information, per-database
|
|||
|
my %thing;
|
|||
|
|
|||
|
my $saved_db;
|
|||
|
for my $x (1..$numdbs) {
|
|||
|
|
|||
|
if ($x > 1 and $samedb) {
|
|||
|
## This means we are looking at a single database over time
|
|||
|
## We load the stored information into the current $dbinfo
|
|||
|
my $filename = audit_filename();
|
|||
|
|
|||
|
if (! -e $filename) {
|
|||
|
## We've not saved any information about this database yet
|
|||
|
## Store the info and exit!
|
|||
|
my $version = $dbver{1}{version};
|
|||
|
write_audit_file({ file => $filename, 'same_schema' => 1,
|
|||
|
info => $thing{1}, pgversion => $version });
|
|||
|
print msg('ss-createfile', $filename) . "\n";
|
|||
|
exit 0;
|
|||
|
}
|
|||
|
|
|||
|
## Meta-information from the file
|
|||
|
my ($conninfo,$ctime,$cversion,$pgversion,$cdbname,$chost,$cport,$cuser);
|
|||
|
|
|||
|
($thing{$x},$conninfo,$ctime,$cversion,$pgversion,$cdbname,$chost,$cport,$cuser)
|
|||
|
= read_audit_file($filename);
|
|||
|
|
|||
|
## Count total objects
|
|||
|
my $totalcount = 0;
|
|||
|
for (keys %{ $thing{$x} }) {
|
|||
|
$totalcount += keys %{ $thing{$x}{$_} };
|
|||
|
}
|
|||
|
|
|||
|
## Add the meta info back into the targetdb
|
|||
|
push @targetdb, {
|
|||
|
filename => $filename,
|
|||
|
conninfo => $conninfo,
|
|||
|
ctime => $ctime,
|
|||
|
cversion => $cversion,
|
|||
|
dbname => $cdbname,
|
|||
|
port => $cport,
|
|||
|
host => $chost,
|
|||
|
dbuser => $cuser,
|
|||
|
pgversion => $pgversion,
|
|||
|
objects => $totalcount,
|
|||
|
};
|
|||
|
|
|||
|
next;
|
|||
|
|
|||
|
} ## end if samedb
|
|||
|
|
|||
|
## Hash of this round's information
|
|||
|
my $dbinfo;
|
|||
|
|
|||
|
for (@catalog_items) {
|
|||
|
my $name = $_->[0];
|
|||
|
$dbinfo->{$name} = find_catalog_info($name, $x, $dbver{$x});
|
|||
|
}
|
|||
|
|
|||
|
## TODO:
|
|||
|
## operator class, cast, aggregate, conversion, domain, tablespace, foreign tables
|
|||
|
## foreign server, wrapper, collation, extensions, roles?
|
|||
|
|
|||
|
## Map the oid back to the user, for ease later on
|
|||
|
for my $row (values %{ $dbinfo->{user} }) {
|
|||
|
$dbinfo->{useroid}{$row->{usesysid}} = $row->{usename};
|
|||
|
}
|
|||
|
|
|||
|
$thing{$x} = $dbinfo;
|
|||
|
|
|||
|
## Count total objects
|
|||
|
my $totalcount = 0;
|
|||
|
for (keys %{ $thing{$x} }) {
|
|||
|
$totalcount += keys %{ $thing{$x}{$_} };
|
|||
|
}
|
|||
|
|
|||
|
$targetdb[$x-1]{objects} = $totalcount;
|
|||
|
|
|||
|
|
|||
|
} ## end each database to query
|
|||
|
|
|||
|
## Start comparing, and put any differences into %fail
|
|||
|
my %fail;
|
|||
|
|
|||
|
## Ugly, but going to use this as a global for the subroutines below:
|
|||
|
$opt{failcount} = 0;
|
|||
|
|
|||
|
## Simple checks that items exist on each database
|
|||
|
for (@catalog_items) {
|
|||
|
my $name = $_->[0];
|
|||
|
$fail{$name}{exists} = schema_item_exists($name, \%thing);
|
|||
|
}
|
|||
|
|
|||
|
## Now check for some more specific items for each item class.
|
|||
|
## For many of these, we want to compare all columns except for
|
|||
|
## certain known exceptions (e.g. anything oid-based)
|
|||
|
## Because we may go across versions, if the column does not exist
|
|||
|
## somewhere, it is simply silently ignored
|
|||
|
## Some items are lists (e.g. acls) and must be treated differently
|
|||
|
|
|||
|
for (@catalog_items) {
|
|||
|
my ($name,$ignore,$lists) = @$_;
|
|||
|
$fail{$name}{diff} = schema_item_differences({
|
|||
|
items => \%thing,
|
|||
|
name => $name,
|
|||
|
ignore => $ignore,
|
|||
|
lists => $lists,
|
|||
|
});
|
|||
|
}
|
|||
|
|
|||
|
## Remove empty hashes for a cleaner debug dump
|
|||
|
for (keys %fail) {
|
|||
|
if (exists $fail{$_}{diff} and ! keys %{ $fail{$_}{diff} }) {
|
|||
|
delete $fail{$_}{diff};
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
## Set the total time
|
|||
|
$db->{totaltime} = sprintf '%.2f', tv_interval($start);
|
|||
|
|
|||
|
## Before we outpu any results, rewrite the audit file if needed
|
|||
|
## We do this if we are reading from a saved file,
|
|||
|
## and the "replace" argument is set
|
|||
|
if ($samedb and $opt{replace}) {
|
|||
|
my $filename = audit_filename();
|
|||
|
if ( -e $filename) {
|
|||
|
## Move the old one to a backup version
|
|||
|
my $backupfile = "$filename.backup";
|
|||
|
rename $filename, $backupfile;
|
|||
|
}
|
|||
|
my $version = $dbver{1}{version};
|
|||
|
write_audit_file({ file => $filename, 'same_schema' => 1,
|
|||
|
info => $thing{1}, pgversion => $version });
|
|||
|
## Cannot print this message as we are outputting Nagios stuff
|
|||
|
#print msg('ss-createfile', $filename) . "\n";
|
|||
|
}
|
|||
|
|
|||
|
## Comparison is done, let's report the results
|
|||
|
if (! $opt{failcount}) {
|
|||
|
add_ok msg('ss-matched');
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
## Build a pretty message giving all the gory details
|
|||
|
my $msg = '';
|
|||
|
|
|||
|
## Adjust the output based on the leading message sizes
|
|||
|
my $maxsize = 1;
|
|||
|
my $msg_exists = msg('ss-existson');
|
|||
|
my $msg_missing = msg('ss-missingon');
|
|||
|
$maxsize = length $msg_exists if length $msg_exists > $maxsize;
|
|||
|
$maxsize = length $msg_missing if length $msg_missing > $maxsize;
|
|||
|
|
|||
|
## Walk through each item type in alphabetical order and output the differences
|
|||
|
for (@catalog_items) {
|
|||
|
my $item = $_->[0];
|
|||
|
|
|||
|
## Pretty name for this type of item. No matches is okay!
|
|||
|
$opt{nomsgok} = 1;
|
|||
|
my $pitem = msg($item) || ucfirst $item;
|
|||
|
$opt{nomsgok} = 0;
|
|||
|
|
|||
|
## See if there are any items of this class that only exist on some
|
|||
|
my $e = $fail{$item}{exists};
|
|||
|
if (keys %$e) {
|
|||
|
for my $name (sort keys %$e) {
|
|||
|
## We do not want to report twice on things that appear inside of schemas
|
|||
|
## However, we do report if the schema *does* exist on any of the missing databases
|
|||
|
if ($item ne 'schema' and $name =~ /(.+?)\./) {
|
|||
|
my $schema = $1;
|
|||
|
## How many databases do not have this?
|
|||
|
my $missingcount = keys %{ $e->{$name}{nothere} };
|
|||
|
my $noschemacount = 0;
|
|||
|
for my $db (keys %{ $e->{$name}{nothere} }) {
|
|||
|
if (exists $fail{schema}{exists}{$schema}{nothere}{$db}) {
|
|||
|
$noschemacount++;
|
|||
|
}
|
|||
|
}
|
|||
|
if ($missingcount == $noschemacount) {
|
|||
|
next;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
## Show the list of the item, and a CSV of which databases have it and which don't
|
|||
|
my $isthere = join ', ' => sort { $a<=>$b } keys %{ $e->{$name}{isthere} };
|
|||
|
my $nothere = join ', ' => sort { $a<=>$b } keys %{ $e->{$name}{nothere} };
|
|||
|
$msg .= sprintf "%s\n %-*s %s\n %-*s %s\n",
|
|||
|
msg('ss-noexist', $pitem, $name),
|
|||
|
$maxsize, $msg_exists,
|
|||
|
$isthere,
|
|||
|
$maxsize, $msg_missing,
|
|||
|
$nothere;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
## See if there are any items for this class that have differences
|
|||
|
my $d = $fail{$item}{diff};
|
|||
|
if (keys %$d) {
|
|||
|
|
|||
|
for my $name (sort keys %$d) {
|
|||
|
my $tdiff = $d->{$name};
|
|||
|
|
|||
|
## Any raw column differences?
|
|||
|
if (exists $tdiff->{coldiff}) {
|
|||
|
my @msg;
|
|||
|
|
|||
|
for my $col (sort keys %{ $tdiff->{coldiff} }) {
|
|||
|
|
|||
|
## Do not show index 'owners': already covered by the table itself
|
|||
|
if ($col eq 'owner' and $item eq 'index') {
|
|||
|
next;
|
|||
|
}
|
|||
|
|
|||
|
## Do not show column number differences if filtered out with "noposition"
|
|||
|
if ($item eq 'column'
|
|||
|
and $col eq 'column_number'
|
|||
|
and $opt{filtered}{noposition}) {
|
|||
|
next;
|
|||
|
}
|
|||
|
|
|||
|
## Do not show function body differences if filtered out with "nofuncbody"
|
|||
|
## Also skip if the equivalent 'dash' and 'empty'
|
|||
|
if ($item eq 'function'
|
|||
|
and $col eq 'prosrc') {
|
|||
|
|
|||
|
next if $opt{filtered}{nofuncbody};
|
|||
|
my ($one,$two);
|
|||
|
for my $db (sort keys %{ $tdiff->{coldiff}{$col} }) {
|
|||
|
if (defined $one) {
|
|||
|
$two = $tdiff->{coldiff}{$col}{$db};
|
|||
|
}
|
|||
|
else {
|
|||
|
$one = $tdiff->{coldiff}{$col}{$db};
|
|||
|
}
|
|||
|
}
|
|||
|
next if $one eq '-' and $two eq '';
|
|||
|
next if $one eq '' and $two eq '-';
|
|||
|
}
|
|||
|
|
|||
|
## If we are doing a historical comparison, skip some items
|
|||
|
if ($samedb) {
|
|||
|
if ($item eq 'sequence'
|
|||
|
and $col eq 'last_value') {
|
|||
|
next;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
push @msg => sprintf " %s\n", msg('ss-different', $col);
|
|||
|
for my $db (sort keys %{ $tdiff->{coldiff}{$col} }) {
|
|||
|
push @msg => sprintf " %s %s: %s\n",
|
|||
|
ucfirst msg('database'),
|
|||
|
$db,
|
|||
|
$tdiff->{coldiff}{$col}{$db};
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if (@msg) {
|
|||
|
$msg .= qq{$pitem "$name":\n};
|
|||
|
$msg .= $_ for @msg;
|
|||
|
}
|
|||
|
else {
|
|||
|
## Everything got filtered out, so decrement this item
|
|||
|
$opt{failcount}--;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
## Any multi-item column differences?
|
|||
|
if (exists $tdiff->{list}) {
|
|||
|
|
|||
|
my @msg;
|
|||
|
for my $col (sort keys %{ $tdiff->{list} }) {
|
|||
|
|
|||
|
## Exclude permissions if 'noperm' filter is set
|
|||
|
if ($col =~ /.acl$/ and $opt{filtered}{noperm}) {
|
|||
|
next;
|
|||
|
}
|
|||
|
|
|||
|
if (exists $tdiff->{list}{$col}{exists}) {
|
|||
|
my $ex = $tdiff->{list}{$col}{exists};
|
|||
|
for my $name (sort keys %$ex) {
|
|||
|
push @msg => sprintf qq{ "%s":\n %s\n},
|
|||
|
$col,
|
|||
|
msg('ss-notset', $name);
|
|||
|
my $isthere = join ', ' => sort { $a<=>$b } keys %{ $ex->{$name}{isthere} };
|
|||
|
my $nothere = join ', ' => sort { $a<=>$b } keys %{ $ex->{$name}{nothere} };
|
|||
|
push @msg => sprintf " %-*s %s\n %-*s %s\n",
|
|||
|
$maxsize, $msg_exists,
|
|||
|
$isthere,
|
|||
|
$maxsize, $msg_missing,
|
|||
|
$nothere;
|
|||
|
}
|
|||
|
}
|
|||
|
if (exists $tdiff->{list}{$col}{diff}) {
|
|||
|
for my $setting (sort keys %{ $tdiff->{list}{$col}{diff} }) {
|
|||
|
|
|||
|
push @msg => sprintf qq{ "%s":\n %s\n},
|
|||
|
$col,
|
|||
|
msg('ss-different', $setting);
|
|||
|
for my $db (sort keys %{ $tdiff->{list}{$col}{diff}{$setting} }) {
|
|||
|
my $val = $tdiff->{list}{$col}{diff}{$setting}{$db};
|
|||
|
push @msg => sprintf " %s %s: %s\n",
|
|||
|
ucfirst msg('database'),
|
|||
|
$db,
|
|||
|
$val;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if (@msg) {
|
|||
|
$msg .= qq{$pitem "$name":\n};
|
|||
|
$msg .= $_ for @msg;
|
|||
|
}
|
|||
|
else {
|
|||
|
## No message means it was all filtered out, so we decrment the master count
|
|||
|
$opt{failcount}--;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
## We may have no items due to exclusions!
|
|||
|
if (! $opt{failcount}) {
|
|||
|
add_ok msg('ss-matched');
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
$db->{perf} = "\n$msg";
|
|||
|
add_critical msg('ss-failed', $opt{failcount});
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_same_schema
|
|||
|
|
|||
|
|
|||
|
sub audit_filename {
|
|||
|
|
|||
|
## Generate the name of the file to store audit information
|
|||
|
|
|||
|
## Get the connection information for this connection
|
|||
|
my $filename = run_command('foo', { conninfo => 1 });
|
|||
|
## Do not care about the username
|
|||
|
$filename =~ s/ user=(.+)//;
|
|||
|
## Strip out the host if not used
|
|||
|
$filename =~ s/ host=<none>//;
|
|||
|
## Replace any other spaces
|
|||
|
$filename =~ s/ /./g;
|
|||
|
## Equals have to be escaped, so we'll change them to a dot
|
|||
|
$filename =~ s/=/./g;
|
|||
|
## The final filename to use
|
|||
|
$filename = "check_postgres.audit.$filename";
|
|||
|
|
|||
|
## The host name may have slashes, so change to underscores
|
|||
|
$filename =~ s{\/}{_}g;
|
|||
|
|
|||
|
## Got a user-supplied extension? Add it now.
|
|||
|
if ($opt{suffix}) {
|
|||
|
$filename .= ".$opt{suffix}";
|
|||
|
}
|
|||
|
|
|||
|
return $filename;
|
|||
|
|
|||
|
} ## end of audit_filename
|
|||
|
|
|||
|
|
|||
|
sub write_audit_file {
|
|||
|
|
|||
|
## Save a new copy of the audit file
|
|||
|
my $arg = shift || {};
|
|||
|
my $filename = $arg->{filename} || audit_filename();
|
|||
|
my $info = $arg->{info} || die;
|
|||
|
|
|||
|
## Create a connection information string
|
|||
|
my $row = $targetdb[0];
|
|||
|
my $conninfo = sprintf '%s%s%s%s',
|
|||
|
defined $row->{port} ? qq{port=$row->{port} } : '',
|
|||
|
defined $row->{host} ? qq{host=$row->{host} } : '',
|
|||
|
defined $row->{dbname} ? qq{dbname=$row->{dbname} } : '',
|
|||
|
defined $row->{dbuser} ? qq{user=$row->{dbuser} } : '';
|
|||
|
|
|||
|
open my $fh, '>', $filename or die qq{Could not open "$filename": $!\n};
|
|||
|
print {$fh} "## Audit file for check_postgres\n";
|
|||
|
print {$fh} "## CP version: $VERSION\n";
|
|||
|
print {$fh} "## PG version: $arg->{pgversion}\n";
|
|||
|
printf {$fh} "## Created: %s\n", scalar localtime();
|
|||
|
print {$fh} "## Connection: $conninfo\n";
|
|||
|
print {$fh} "## Database name: $row->{dbname}\n";
|
|||
|
print {$fh} "## Host: $row->{host}\n";
|
|||
|
print {$fh} "## Port: $row->{port}\n";
|
|||
|
print {$fh} "## User: $row->{dbuser}\n";
|
|||
|
if ($arg->{same_schema}) {
|
|||
|
print {$fh} "## Start of same_schema information:\n";
|
|||
|
{
|
|||
|
local $Data::Dumper::Indent = 1;
|
|||
|
print {$fh} Dumper $info;
|
|||
|
}
|
|||
|
print {$fh} "## End of same_schema information\n";
|
|||
|
}
|
|||
|
|
|||
|
close $fh or warn qq{Could not close "$filename": $!\n};
|
|||
|
|
|||
|
} ## end of write_audit_file
|
|||
|
|
|||
|
|
|||
|
sub read_audit_file {
|
|||
|
|
|||
|
## Read in the data from a historical file
|
|||
|
## Returns four items:
|
|||
|
## 1. The standard catalog structure that was saved
|
|||
|
## 2. Connection information string
|
|||
|
## 3. Date the file was created
|
|||
|
## 4. The version it was created with
|
|||
|
|
|||
|
my $filename = shift;
|
|||
|
|
|||
|
open my $fh, '<', $filename or die qq{Could not open "$filename": $!\n};
|
|||
|
my $inside = 0;
|
|||
|
my $data = '';
|
|||
|
my ($conninfo,$ctime,$cversion,$pgversion) = ('???','???','???','???');
|
|||
|
my ($cdbname,$chost,$cport,$cuser) = ('???','???','???','???');
|
|||
|
while (<$fh>) {
|
|||
|
if (!$inside) {
|
|||
|
if (/Start of same_schema/) {
|
|||
|
$inside = 1;
|
|||
|
}
|
|||
|
elsif (/CP version: (.+)/) {
|
|||
|
$cversion = $1;
|
|||
|
}
|
|||
|
elsif (/PG version: (.+)/) {
|
|||
|
$pgversion = $1;
|
|||
|
}
|
|||
|
elsif (/Created: (.+)/) {
|
|||
|
$ctime = $1;
|
|||
|
}
|
|||
|
elsif (/Connection: (.+)/) {
|
|||
|
$conninfo = $1;
|
|||
|
}
|
|||
|
elsif (/Database name: (.+)/) {
|
|||
|
$cdbname = $1;
|
|||
|
}
|
|||
|
elsif (/Host: (.+)/) {
|
|||
|
$chost = $1;
|
|||
|
}
|
|||
|
elsif (/Port: (.+)/) {
|
|||
|
$cport = $1;
|
|||
|
}
|
|||
|
elsif (/User: (.+)/) {
|
|||
|
$cuser = $1;
|
|||
|
}
|
|||
|
}
|
|||
|
elsif (/End of same_schema/) {
|
|||
|
last;
|
|||
|
}
|
|||
|
else {
|
|||
|
$data .= $_;
|
|||
|
}
|
|||
|
}
|
|||
|
close $fh or warn qq{Could not close "$filename": $!\n};
|
|||
|
|
|||
|
my $POSTGRES1;
|
|||
|
eval $data; ## no critic (ProhibitStringyEval)
|
|||
|
if ($@) {
|
|||
|
die qq{Failed to parse file "$filename": $@\n};
|
|||
|
}
|
|||
|
return $POSTGRES1, $conninfo, $ctime, $cversion,
|
|||
|
$pgversion, $cdbname, $chost, $cport, $cuser;
|
|||
|
|
|||
|
} ## end of read_audit_file
|
|||
|
|
|||
|
|
|||
|
sub schema_item_exists {
|
|||
|
|
|||
|
## Compare a certain class of items across all databases for existence
|
|||
|
## Returns a hashref of item names, with "isthere" and "nothere"
|
|||
|
## with keys of database numbers underneath that
|
|||
|
|
|||
|
my $item_class = shift;
|
|||
|
my $itemhash = shift;
|
|||
|
|
|||
|
## Things that failed to match:
|
|||
|
my %nomatch;
|
|||
|
|
|||
|
my $key = "no${item_class}_regex";
|
|||
|
my $exclude_regex = exists $opt{filtered}->{$key} ? $opt{filtered}->{$key} : [];
|
|||
|
|
|||
|
for my $db1 (sort keys %$itemhash) {
|
|||
|
for my $db2 (sort keys %$itemhash) {
|
|||
|
next if $db1 == $db2;
|
|||
|
for my $name (sort keys %{ $itemhash->{$db1}{$item_class} }) {
|
|||
|
|
|||
|
## Can exclude by 'filter' based regex
|
|||
|
next if grep { $name eq $_ } @$exclude_regex;
|
|||
|
|
|||
|
if (! exists $itemhash->{$db2}{$item_class}{$name}) {
|
|||
|
|
|||
|
## Special exception for columns: do not add if the table is non-existent
|
|||
|
if ($item_class eq 'column') {
|
|||
|
(my $tablename = $name) =~ s/(.+)\..+/$1/;
|
|||
|
next if ! exists $itemhash->{$db2}{table}{$tablename};
|
|||
|
}
|
|||
|
|
|||
|
## Special exception for triggers: do not add if the table is non-existent
|
|||
|
if ($item_class eq 'trigger') {
|
|||
|
my $it = $itemhash->{$db1}{$item_class}{$name};
|
|||
|
my $tablename = "$it->{tschema}.$it->{tname}";
|
|||
|
next if ! exists $itemhash->{$db2}{table}{$tablename};
|
|||
|
}
|
|||
|
|
|||
|
$nomatch{$name}{isthere}{$db1} = 1;
|
|||
|
$nomatch{$name}{nothere}{$db2} = 1;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
## Increment our fail count once per item mismatch
|
|||
|
$opt{failcount} += keys %nomatch;
|
|||
|
|
|||
|
return \%nomatch;
|
|||
|
|
|||
|
} ## end of schema_item_exists
|
|||
|
|
|||
|
|
|||
|
sub schema_item_differences {
|
|||
|
|
|||
|
## Compare a certain class of items across all databases for differences
|
|||
|
## Takes a hashref of argument, including:
|
|||
|
## name: the item class name
|
|||
|
## items: the main hashref of all items
|
|||
|
## ignore: which fields to ignore. CSV
|
|||
|
## lists: which fields are lists. CSV
|
|||
|
## Modified the items hashref by incrementing items->{failcount}
|
|||
|
## Returns s hashref of item names, with details as to the diffs therein
|
|||
|
|
|||
|
my $arg = shift;
|
|||
|
|
|||
|
my $item_class = $arg->{name} or die;
|
|||
|
my $itemhash = $arg->{items} or die;
|
|||
|
|
|||
|
## Things we completely ignore:
|
|||
|
my $ignore = { oid => 1 };
|
|||
|
if (exists $arg->{ignore}) {
|
|||
|
for my $item (split /\s*,\s*/ => $arg->{ignore}) {
|
|||
|
$ignore->{$item} = 1;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
## Things that are handled as lists:
|
|||
|
my $lists = {};
|
|||
|
if (exists $arg->{lists}) {
|
|||
|
for my $item (split /\s*,\s*/ => $arg->{lists}) {
|
|||
|
$lists->{$item} = 1;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
## The final lists of mismatched items we pass back
|
|||
|
my %nomatch;
|
|||
|
|
|||
|
my $key = "no${item_class}_regex";
|
|||
|
my $exclude_regex = exists $opt{filtered}->{$key} ? $opt{filtered}->{$key} : [];
|
|||
|
|
|||
|
for my $db1 (sort keys %$itemhash) {
|
|||
|
for my $db2 (sort keys %$itemhash) {
|
|||
|
next if $db1 >= $db2;
|
|||
|
for my $name (sort keys %{ $itemhash->{$db1}{$item_class} }) {
|
|||
|
|
|||
|
## Can exclude by 'filter' based regex
|
|||
|
next if grep { $name eq $_ } @$exclude_regex;
|
|||
|
|
|||
|
## This case has already been handled:
|
|||
|
next if ! exists $itemhash->{$db2}{$item_class}{$name};
|
|||
|
|
|||
|
## Special exception for columns: do not add if the table is non-existent
|
|||
|
if ($item_class eq 'column') {
|
|||
|
(my $tablename = $name) =~ s/(.+)\..+/$1/;
|
|||
|
next if ! exists $itemhash->{$db2}{table}{$tablename};
|
|||
|
}
|
|||
|
|
|||
|
my $one = $itemhash->{$db1}{$item_class}{$name};
|
|||
|
my $two = $itemhash->{$db2}{$item_class}{$name};
|
|||
|
|
|||
|
for my $col (keys %$one) {
|
|||
|
|
|||
|
## Skip if this col is ignored
|
|||
|
next if exists $ignore->{$col};
|
|||
|
|
|||
|
## If it doesn't exist on the other, just ignore it
|
|||
|
next if ! exists $two->{$col};
|
|||
|
|
|||
|
## If they are the same, move on!
|
|||
|
next if $one->{$col} eq $two->{$col};
|
|||
|
|
|||
|
## Skip certain known numeric fields that have text versions:
|
|||
|
next if $col =~ /.(?:namespace|owner|filenode|oid|relid)$/;
|
|||
|
|
|||
|
## If not a list, just report on the exact match here and move on:
|
|||
|
if (! exists $lists->{$col} and $col !~ /.acl$/) {
|
|||
|
$nomatch{$name}{coldiff}{$col}{$db1} = $one->{$col};
|
|||
|
$nomatch{$name}{coldiff}{$col}{$db2} = $two->{$col};
|
|||
|
next;
|
|||
|
}
|
|||
|
|
|||
|
## This is a list, so we have to break it down to see if it is really different
|
|||
|
## May be empty or of the form {foo=bar,baz=yak}
|
|||
|
|
|||
|
my (%list1,%list2);
|
|||
|
my ($uno,$dos) = ($one->{$col}, $two->{$col});
|
|||
|
|
|||
|
if (length $uno) {
|
|||
|
die "Invalid list: $uno for db $db1:$name:$col\n" if $uno !~ /^{(.+)}$/;
|
|||
|
%list1 = map { /(.*)=(.+)/ or die "Invalid list: $uno"; $1,$2 } split /,/ => $1;
|
|||
|
}
|
|||
|
if (length $dos) {
|
|||
|
die "Invalid list: $dos for db $db2:$name:$col\n" if $dos !~ /^{(.+)}$/;
|
|||
|
%list2 = map { /(.*)=(.+)/ or die "Invalid list: $uno"; $1,$2 } split /,/ => $1;
|
|||
|
}
|
|||
|
|
|||
|
## Items in 1 but not 2?
|
|||
|
for my $setting (sort keys %list1) {
|
|||
|
if (! exists $list2{$setting}) {
|
|||
|
$nomatch{$name}{list}{$col}{exists}{$setting}{isthere}{$db1} = 1;
|
|||
|
$nomatch{$name}{list}{$col}{exists}{$setting}{nothere}{$db2} = 1;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
## Items in 2 but not 1? Value diferences?
|
|||
|
for my $setting (sort keys %list2) {
|
|||
|
if (! exists $list1{$setting}) {
|
|||
|
$nomatch{$name}{list}{$col}{exists}{$setting}{isthere}{$db2} = 1;
|
|||
|
$nomatch{$name}{list}{$col}{exists}{$setting}{nothere}{$db1} = 1;
|
|||
|
}
|
|||
|
elsif ($list1{$setting} ne $list2{$setting}) {
|
|||
|
$nomatch{$name}{list}{$col}{diff}{$setting}{$db1} = $list1{$setting};
|
|||
|
$nomatch{$name}{list}{$col}{diff}{$setting}{$db2} = $list2{$setting};
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
$opt{failcount} += keys %nomatch;
|
|||
|
|
|||
|
return \%nomatch;
|
|||
|
|
|||
|
} ## end of schema_item_differences
|
|||
|
|
|||
|
|
|||
|
sub find_catalog_info {
|
|||
|
|
|||
|
## Grab information from one or more catalog tables
|
|||
|
## Convert into a happy hashref and return it
|
|||
|
## Arguments: three
|
|||
|
## 1. Type of object
|
|||
|
## 2. Database number
|
|||
|
## 3. Version information for the database
|
|||
|
## Returns: large hashref of information
|
|||
|
|
|||
|
## What type of catalog object this is
|
|||
|
my $type = shift;
|
|||
|
|
|||
|
## We must know about this type
|
|||
|
if (! exists $catalog_info{$type}) {
|
|||
|
die "Unknown type of '$type' sent to find_catalog_info";
|
|||
|
}
|
|||
|
my $ci = $catalog_info{$type};
|
|||
|
|
|||
|
## The final hashref of rows we return
|
|||
|
my $result = {};
|
|||
|
|
|||
|
## Do nothing if we are excluding this type of object entirely
|
|||
|
return $result if $opt{filtered}{"no$type"};
|
|||
|
|
|||
|
## Which database to run this against
|
|||
|
my $dbnum = shift or die;
|
|||
|
|
|||
|
## The version information
|
|||
|
my $dbver = shift or die;
|
|||
|
|
|||
|
## The SQL we use
|
|||
|
my $SQL = $ci->{SQL} or die "No SQL found for type '$type'\n";
|
|||
|
|
|||
|
## Switch to alternate SQL for different versions
|
|||
|
if ($type eq 'language') {
|
|||
|
if (int $dbver->{major} <= 8.2) {
|
|||
|
$SQL = $ci->{SQL2};
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if (exists $ci->{exclude}) {
|
|||
|
if ('temp_schemas' eq $ci->{exclude}) {
|
|||
|
if (! $opt{filtered}{system}) {
|
|||
|
$SQL .= q{ WHERE nspname !~ '^pg_t'};
|
|||
|
}
|
|||
|
}
|
|||
|
elsif ('system' eq $ci->{exclude}) {
|
|||
|
if (! $opt{filtered}{system}) {
|
|||
|
$SQL .= sprintf
|
|||
|
q{ %s n.nspname !~ '^pg' AND n.nspname <> 'information_schema'},
|
|||
|
$SQL =~ /WHERE/ ? 'AND' : 'WHERE';
|
|||
|
}
|
|||
|
}
|
|||
|
else {
|
|||
|
die "Unknown exclude '$ci->{exclude}' called";
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
## Final wrapup
|
|||
|
if (exists $ci->{postSQL}) {
|
|||
|
$SQL .= " $ci->{postSQL}";
|
|||
|
}
|
|||
|
|
|||
|
## Send our SQL to the correct database via psql and grab the results
|
|||
|
my $info = run_command($SQL, { dbnumber => $dbnum });
|
|||
|
|
|||
|
## The row column we use as the main hash key
|
|||
|
my $key = $ci->{keyname} || 'name';
|
|||
|
|
|||
|
## Keep track of the actual column numbers
|
|||
|
my $last_table = '';
|
|||
|
my $colnum = 1;
|
|||
|
|
|||
|
## Only need to pull back the first and only db, so we can say [0] here
|
|||
|
for my $row (@{$info->{db}[0]{slurp}}) {
|
|||
|
|
|||
|
## Remove any information that should be deleted
|
|||
|
for ( @{$info->{deletecols}}) {
|
|||
|
delete $row->{$_};
|
|||
|
}
|
|||
|
|
|||
|
## Determine the name to use. For most things this is simply the passed in key
|
|||
|
my $name = $row->{$key};
|
|||
|
|
|||
|
## For a function, we also want to put the args into the name
|
|||
|
if ($type eq 'function') {
|
|||
|
## Grab all type mappings
|
|||
|
$SQL = 'SELECT oid, typname FROM pg_type';
|
|||
|
my %oid2type;
|
|||
|
my $tinfo = run_command($SQL, { dbnumber => $dbnum });
|
|||
|
for my $row (@{ $tinfo->{db}[0]{slurp} }) {
|
|||
|
$oid2type{$row->{oid}} = $row->{typname};
|
|||
|
}
|
|||
|
(my $args = $row->{proargtypes}) =~ s/(\d+)/$oid2type{$1}||$1/ge;
|
|||
|
$args =~ s/ /,/g;
|
|||
|
$args =~ s/ints/smallint/g;
|
|||
|
$args =~ s/int4/int/g;
|
|||
|
$args =~ s/int8/bigint/g;
|
|||
|
$name .= "($args)";
|
|||
|
|
|||
|
}
|
|||
|
|
|||
|
## For columns, reduce the attnum to a simpler canonical form without holes
|
|||
|
if ($type eq 'column') {
|
|||
|
if ($row->{tname} ne $last_table) {
|
|||
|
$last_table = $row->{tname};
|
|||
|
$colnum = 1;
|
|||
|
}
|
|||
|
$row->{column_number} = $colnum++;
|
|||
|
}
|
|||
|
|
|||
|
## Store this row into our result hash, using a good key
|
|||
|
$result->{$name} = $row;
|
|||
|
|
|||
|
## We may want to run additional SQL per row returned
|
|||
|
if (exists $ci->{innerSQL}) {
|
|||
|
(my $SQL2 = $ci->{innerSQL}) =~ s/ROW(\w+)/$row->{lc $1}/g;
|
|||
|
my $info2 = run_command($SQL2, { dbnumber => $dbnum } );
|
|||
|
for my $row2 (@{ $info2->{db}[0]{slurp} }) {
|
|||
|
for my $inner (keys %{ $row2 }) {
|
|||
|
$result->{$row->{$key}}{$inner} = $row2->{$inner};
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return $result;
|
|||
|
|
|||
|
} ## end of find_catalog_info
|
|||
|
|
|||
|
|
|||
|
sub check_sequence {
|
|||
|
|
|||
|
## Checks how many values are left in sequences
|
|||
|
## Supports: Nagios, MRTG
|
|||
|
## Warning and critical are percentages
|
|||
|
## Can exclude and include sequences
|
|||
|
|
|||
|
my ($warning, $critical) = validate_range
|
|||
|
({
|
|||
|
type => 'percent',
|
|||
|
default_warning => '85%',
|
|||
|
default_critical => '95%',
|
|||
|
forcemrtg => 1,
|
|||
|
});
|
|||
|
|
|||
|
(my $w = $warning) =~ s/\D//;
|
|||
|
(my $c = $critical) =~ s/\D//;
|
|||
|
|
|||
|
## Gather up all sequence names
|
|||
|
## no critic
|
|||
|
my $SQL = q{
|
|||
|
SELECT DISTINCT ON (nspname, seqname) nspname, seqname,
|
|||
|
quote_ident(nspname) || '.' || quote_ident(seqname) AS safename, typname
|
|||
|
-- sequences by column dependency
|
|||
|
FROM (
|
|||
|
SELECT depnsp.nspname, dep.relname as seqname, typname
|
|||
|
FROM pg_depend
|
|||
|
JOIN pg_class on classid = pg_class.oid
|
|||
|
JOIN pg_class dep on dep.oid = objid
|
|||
|
JOIN pg_namespace depnsp on depnsp.oid= dep.relnamespace
|
|||
|
JOIN pg_class refclass on refclass.oid = refclassid
|
|||
|
JOIN pg_class ref on ref.oid = refobjid
|
|||
|
JOIN pg_namespace refnsp on refnsp.oid = ref.relnamespace
|
|||
|
JOIN pg_attribute refattr ON (refobjid, refobjsubid) = (refattr.attrelid, refattr.attnum)
|
|||
|
JOIN pg_type ON refattr.atttypid = pg_type.oid
|
|||
|
WHERE pg_class.relname = 'pg_class'
|
|||
|
AND refclass.relname = 'pg_class'
|
|||
|
AND dep.relkind in ('S')
|
|||
|
AND ref.relkind in ('r')
|
|||
|
AND typname IN ('int2', 'int4', 'int8')
|
|||
|
UNION ALL
|
|||
|
--sequences by parsing DEFAULT constraints
|
|||
|
SELECT nspname, seq.relname, typname
|
|||
|
FROM pg_attrdef
|
|||
|
JOIN pg_attribute ON (attrelid, attnum) = (adrelid, adnum)
|
|||
|
JOIN pg_type on pg_type.oid = atttypid
|
|||
|
JOIN pg_class rel ON rel.oid = attrelid
|
|||
|
JOIN pg_class seq ON seq.relname = regexp_replace(adsrc, $re$^nextval\('(.+?)'::regclass\)$$re$, $$\1$$)
|
|||
|
AND seq.relnamespace = rel.relnamespace
|
|||
|
JOIN pg_namespace nsp ON nsp.oid = seq.relnamespace
|
|||
|
WHERE adsrc ~ 'nextval' AND seq.relkind = 'S' AND typname IN ('int2', 'int4', 'int8')
|
|||
|
UNION ALL
|
|||
|
-- all sequences, to catch those whose associations are not obviously recorded in pg_catalog
|
|||
|
SELECT nspname, relname, CAST('int8' AS TEXT)
|
|||
|
FROM pg_class
|
|||
|
JOIN pg_namespace nsp ON nsp.oid = relnamespace
|
|||
|
WHERE relkind = 'S'
|
|||
|
) AS seqs
|
|||
|
ORDER BY nspname, seqname, typname
|
|||
|
};
|
|||
|
## use critic
|
|||
|
|
|||
|
my $info = run_command($SQL, {regex => qr{\w}, emptyok => 1} );
|
|||
|
|
|||
|
my $MAXINT2 = 32767;
|
|||
|
my $MAXINT4 = 2147483647;
|
|||
|
my $MAXINT8 = 9223372036854775807;
|
|||
|
|
|||
|
my $limit = 0;
|
|||
|
|
|||
|
for $db (@{$info->{db}}) {
|
|||
|
my (@crit,@warn,@ok);
|
|||
|
my $maxp = 0;
|
|||
|
my %seqinfo;
|
|||
|
my %seqperf;
|
|||
|
my $multidb = @{$info->{db}} > 1 ? "$db->{dbname}." : '';
|
|||
|
for my $r (@{$db->{slurp}}) {
|
|||
|
my ($schema, $seq, $seqname, $typename) = @$r{qw/ nspname seqname safename typname /};
|
|||
|
next if skip_item($seq);
|
|||
|
my $maxValue = $typename eq 'int2' ? $MAXINT2 : $typename eq 'int4' ? $MAXINT4 : $MAXINT8;
|
|||
|
$SQL = qq{
|
|||
|
SELECT last_value, slots, used, ROUND(used/slots*100) AS percent,
|
|||
|
CASE WHEN slots < used THEN 0 ELSE slots - used END AS numleft
|
|||
|
FROM (
|
|||
|
SELECT last_value,
|
|||
|
CEIL((LEAST(max_value, $maxValue)-min_value::numeric+1)/increment_by::NUMERIC) AS slots,
|
|||
|
CEIL((last_value-min_value::numeric+1)/increment_by::NUMERIC) AS used
|
|||
|
FROM $seqname) foo
|
|||
|
};
|
|||
|
|
|||
|
my $seqinfo = run_command($SQL, { target => $db });
|
|||
|
my $r2 = $seqinfo->{db}[0]{slurp}[0];
|
|||
|
my ($last, $slots, $used, $percent, $left) = @$r2{qw/ last_value slots used percent numleft / };
|
|||
|
if (! defined $last) {
|
|||
|
ndie msg('seq-die', $seqname);
|
|||
|
}
|
|||
|
my $msg = msg('seq-msg', $seqname, $percent, $left);
|
|||
|
my $nicename = perfname("$multidb$seqname");
|
|||
|
$seqperf{$percent}{$seqname} = [$left, " $nicename=$percent%;$w%;$c%"];
|
|||
|
if ($percent >= $maxp) {
|
|||
|
$maxp = $percent;
|
|||
|
if (! exists $opt{perflimit} or $limit++ < $opt{perflimit}) {
|
|||
|
push @{$seqinfo{$percent}} => $MRTG ? [$seqname,$percent,$slots,$used,$left] : $msg;
|
|||
|
}
|
|||
|
}
|
|||
|
next if $MRTG;
|
|||
|
|
|||
|
if (length $critical and $percent >= $c) {
|
|||
|
push @crit => $msg;
|
|||
|
}
|
|||
|
elsif (length $warning and $percent >= $w) {
|
|||
|
push @warn => $msg;
|
|||
|
}
|
|||
|
}
|
|||
|
if ($MRTG) {
|
|||
|
my $msg = join ' | ' => map { $_->[0] } @{$seqinfo{$maxp}};
|
|||
|
do_mrtg({one => $maxp, msg => $msg});
|
|||
|
}
|
|||
|
$limit = 0;
|
|||
|
PERF: for my $val (sort { $b <=> $a } keys %seqperf) {
|
|||
|
for my $seq (sort { $seqperf{$val}{$a}->[0] <=> $seqperf{$val}{$b}->[0] or $a cmp $b } keys %{$seqperf{$val}}) {
|
|||
|
last PERF if exists $opt{perflimit} and $limit++ >= $opt{perflimit};
|
|||
|
$db->{perf} .= $seqperf{$val}{$seq}->[1];
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if (@crit) {
|
|||
|
add_critical join ' ' => @crit;
|
|||
|
}
|
|||
|
elsif (@warn) {
|
|||
|
add_warning join ' ' => @warn;
|
|||
|
}
|
|||
|
else {
|
|||
|
if (keys %seqinfo) {
|
|||
|
add_ok join ' ' => @{$seqinfo{$maxp}};
|
|||
|
}
|
|||
|
else {
|
|||
|
add_ok msg('seq-none');
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_sequence
|
|||
|
|
|||
|
|
|||
|
sub check_settings_checksum {
|
|||
|
|
|||
|
## Verify the checksum of all settings
|
|||
|
## Supports: Nagios, MRTG
|
|||
|
## Not that this will vary from user to user due to ALTER USER
|
|||
|
## and because superusers see additional settings
|
|||
|
## One of warning or critical must be given (but not both)
|
|||
|
## It should run one time to find out the expected checksum
|
|||
|
## You can use --critical="0" to find out the checksum
|
|||
|
## You can include or exclude settings as well
|
|||
|
## Example:
|
|||
|
## check_postgres_settings_checksum --critical="4e7ba68eb88915d3d1a36b2009da4acd"
|
|||
|
|
|||
|
my ($warning, $critical) = validate_range({type => 'checksum', onlyone => 1});
|
|||
|
|
|||
|
eval {
|
|||
|
require Digest::MD5;
|
|||
|
};
|
|||
|
if ($@) {
|
|||
|
ndie msg('checksum-nomd');
|
|||
|
}
|
|||
|
|
|||
|
$SQL = 'SELECT name, setting FROM pg_settings ORDER BY name';
|
|||
|
my $info = run_command($SQL, { regex => qr[client_encoding] });
|
|||
|
|
|||
|
for $db (@{$info->{db}}) {
|
|||
|
|
|||
|
my $newstring = '';
|
|||
|
for my $r (@{$db->{slurp}}) {
|
|||
|
next SLURP if skip_item($r->{name});
|
|||
|
$newstring .= "$r->{name} $r->{setting}\n";
|
|||
|
}
|
|||
|
if (! length $newstring) {
|
|||
|
add_unknown msg('no-match-set');
|
|||
|
}
|
|||
|
|
|||
|
my $checksum = Digest::MD5::md5_hex($newstring);
|
|||
|
|
|||
|
my $msg = msg('checksum-msg', $checksum);
|
|||
|
if ($MRTG) {
|
|||
|
$opt{mrtg} or ndie msg('checksum-nomrtg');
|
|||
|
do_mrtg({one => $opt{mrtg} eq $checksum ? 1 : 0, msg => $checksum});
|
|||
|
}
|
|||
|
if ($critical and $critical ne $checksum) {
|
|||
|
add_critical $msg;
|
|||
|
}
|
|||
|
elsif ($warning and $warning ne $checksum) {
|
|||
|
add_warning $msg;
|
|||
|
}
|
|||
|
elsif (!$critical and !$warning) {
|
|||
|
add_unknown $msg;
|
|||
|
}
|
|||
|
else {
|
|||
|
add_ok $msg;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_settings_checksum
|
|||
|
|
|||
|
|
|||
|
sub check_slony_status {
|
|||
|
|
|||
|
## Checks the sl_status table
|
|||
|
## Returns unknown if sl_status is not found
|
|||
|
## Returns critical is status is not "good"
|
|||
|
## Otherwise, returns based on time-based warning and critical options
|
|||
|
## Supports: Nagios, MRTG
|
|||
|
|
|||
|
my ($warning, $critical) = validate_range
|
|||
|
({
|
|||
|
type => 'time',
|
|||
|
default_warning => '60',
|
|||
|
default_critical => '300',
|
|||
|
});
|
|||
|
|
|||
|
## If given schemas on the command-line, map back to targetdbs
|
|||
|
if (defined $opt{schema}) {
|
|||
|
my $x = 0;
|
|||
|
for my $db (@targetdb) {
|
|||
|
$db->{schemalist} = $opt{schema}->[$x] || '';
|
|||
|
$x++;
|
|||
|
}
|
|||
|
}
|
|||
|
else {
|
|||
|
## Otherwise, find all slony schemas and put them in ourselves
|
|||
|
$SQL = q{SELECT quote_ident(nspname) AS nspname FROM pg_namespace WHERE oid IN }.
|
|||
|
q{(SELECT relnamespace FROM pg_class WHERE relkind = 'v' AND relname = 'sl_status')};
|
|||
|
my $info = run_command($SQL);
|
|||
|
for my $db (@{ $info->{db} }) {
|
|||
|
$db->{schemalist} = join ',' => map { $_->{nspname} } @{ $db->{slurp} };
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
my $SLSQL =
|
|||
|
q{SELECT
|
|||
|
ROUND(EXTRACT(epoch FROM st_lag_time)) AS lagtime,
|
|||
|
st_origin,
|
|||
|
st_received,
|
|||
|
current_database() AS cd,
|
|||
|
COALESCE(n1.no_comment, '') AS com1,
|
|||
|
COALESCE(n2.no_comment, '') AS com2
|
|||
|
FROM SCHEMA.sl_status
|
|||
|
JOIN SCHEMA.sl_node n1 ON (n1.no_id=st_origin)
|
|||
|
JOIN SCHEMA.sl_node n2 ON (n2.no_id=st_received)};
|
|||
|
|
|||
|
my $maxlagtime = -1;
|
|||
|
|
|||
|
my $x = 1;
|
|||
|
for $db (@targetdb) {
|
|||
|
next if ! $db->{schemalist};
|
|||
|
$db->{perf} = '';
|
|||
|
my @perf;
|
|||
|
for my $schema (split /,/ => $db->{schemalist}) {
|
|||
|
## Set for output
|
|||
|
$db->{showschema} = $schema;
|
|||
|
|
|||
|
(my $SQL = $SLSQL) =~ s/SCHEMA/$schema/g;
|
|||
|
my $info = run_command($SQL, { dbnumber => $x });
|
|||
|
my $slurp = $info->{db}[0]{slurp}[0];
|
|||
|
if (! defined $slurp->{lagtime}) {
|
|||
|
add_unknown msg('slony-nonumber');
|
|||
|
return;
|
|||
|
}
|
|||
|
my ($lag,$from,$to,$dbname,$fromc,$toc) = @$slurp{qw/ lagtime st_origin st_received cd com1 com2/};
|
|||
|
$maxlagtime = $lag if $lag > $maxlagtime;
|
|||
|
push @perf => [
|
|||
|
$lag,
|
|||
|
$from,
|
|||
|
qq{'$dbname.$schema Node $from($fromc) -> Node $to($toc)'=$lag;$warning;$critical},
|
|||
|
];
|
|||
|
|
|||
|
} ## end each schema in this database
|
|||
|
|
|||
|
if ($MRTG) {
|
|||
|
do_mrtg({one => $maxlagtime});
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
$db->{perf} .= join "\n" => map { $_->[2] } sort { $b->[0]<=>$a->[0] or $a->[1] cmp $b->[1] } @perf;
|
|||
|
|
|||
|
my $msg = msg('slony-lagtime', $maxlagtime);
|
|||
|
$msg .= sprintf ' (%s)', pretty_time($maxlagtime, $maxlagtime > 500 ? 'S' : '');
|
|||
|
if (length $critical and $maxlagtime >= $critical) {
|
|||
|
add_critical $msg;
|
|||
|
}
|
|||
|
elsif (length $warning and $maxlagtime >= $warning) {
|
|||
|
add_warning $msg;
|
|||
|
}
|
|||
|
else {
|
|||
|
add_ok $msg;
|
|||
|
}
|
|||
|
|
|||
|
$x++;
|
|||
|
}
|
|||
|
|
|||
|
if ($maxlagtime < 1) { ## No schemas found
|
|||
|
add_unknown msg('slony-noschema');
|
|||
|
}
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_slony_status
|
|||
|
|
|||
|
|
|||
|
sub check_timesync {
|
|||
|
|
|||
|
## Compare local time to the database time
|
|||
|
## Supports: Nagios, MRTG
|
|||
|
## Warning and critical are given in number of seconds difference
|
|||
|
|
|||
|
my ($warning,$critical) = validate_range
|
|||
|
({
|
|||
|
type => 'seconds',
|
|||
|
default_warning => 2,
|
|||
|
default_critical => 5,
|
|||
|
});
|
|||
|
|
|||
|
$SQL = q{SELECT round(extract(epoch FROM now())) AS epok, TO_CHAR(now(),'YYYY-MM-DD HH24:MI:SS') AS pretti};
|
|||
|
my $info = run_command($SQL);
|
|||
|
my $localepoch = time;
|
|||
|
my @l = localtime;
|
|||
|
|
|||
|
for $db (@{$info->{db}}) {
|
|||
|
my ($pgepoch,$pgpretty) = @{$db->{slurp}->[0]}{qw/ epok pretti /};
|
|||
|
|
|||
|
my $diff = abs($pgepoch - $localepoch);
|
|||
|
if ($MRTG) {
|
|||
|
do_mrtg({one => $diff, msg => "DB: $db->{dbname}"});
|
|||
|
}
|
|||
|
$db->{perf} = sprintf '%s=%ss;%s;%s',
|
|||
|
perfname(msg('timesync-diff')), $diff, $warning, $critical;
|
|||
|
|
|||
|
my $localpretty = sprintf '%d-%02d-%02d %02d:%02d:%02d', $l[5]+1900, $l[4]+1, $l[3],$l[2],$l[1],$l[0];
|
|||
|
my $msg = msg('timesync-msg', $diff, $pgpretty, $localpretty);
|
|||
|
|
|||
|
if (length $critical and $diff >= $critical) {
|
|||
|
add_critical $msg;
|
|||
|
}
|
|||
|
elsif (length $warning and $diff >= $warning) {
|
|||
|
add_warning $msg;
|
|||
|
}
|
|||
|
else {
|
|||
|
add_ok $msg;
|
|||
|
}
|
|||
|
}
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_timesync
|
|||
|
|
|||
|
|
|||
|
sub check_txn_idle {
|
|||
|
|
|||
|
## Check the duration and optionally number of "idle in transaction" processes
|
|||
|
## Supports: Nagios, MRTG
|
|||
|
## It makes no sense to run this more than once on the same cluster
|
|||
|
## Warning and critical are time limits or counts for time limits - default to seconds
|
|||
|
## Valid time units: s[econd], m[inute], h[our], d[ay]
|
|||
|
## All above may be written as plural as well (e.g. "2 hours")
|
|||
|
## Valid counts for time limits: "$int for $time"
|
|||
|
## Can also ignore databases with exclude and limit with include
|
|||
|
## Limit to a specific user with the includeuser option
|
|||
|
## Exclude users with the excludeuser option
|
|||
|
|
|||
|
my $type = shift || 'txnidle';
|
|||
|
my $thing = shift || msg('transactions');
|
|||
|
my $perf = shift || msg('txn-time');
|
|||
|
my $start = shift || 'query_start';
|
|||
|
my $clause = shift || q{current_query ~ '^<'};
|
|||
|
|
|||
|
## Extract the warning and critical seconds and counts.
|
|||
|
## If not given, items will be an empty string
|
|||
|
my ($wcount, $wtime, $ccount, $ctime) = validate_integer_for_time();
|
|||
|
|
|||
|
## We don't GROUP BY because we want details on every connection
|
|||
|
## Someday we may even break things down by database
|
|||
|
if ($type ne 'qtime') {
|
|||
|
$SQL = q{SELECT datname, datid, procpid, usename, client_addr, xact_start, current_query, }.
|
|||
|
q{CASE WHEN client_port < 0 THEN 0 ELSE client_port END AS client_port, }.
|
|||
|
qq{COALESCE(ROUND(EXTRACT(epoch FROM now()-$start)),0) AS seconds }.
|
|||
|
qq{FROM pg_stat_activity WHERE $clause$USERWHERECLAUSE }.
|
|||
|
q{ORDER BY xact_start, query_start, procpid DESC};
|
|||
|
}
|
|||
|
else {
|
|||
|
$SQL = q{SELECT datname, datid, procpid, usename, client_addr, current_query, }.
|
|||
|
q{CASE WHEN client_port < 0 THEN 0 ELSE client_port END AS client_port, }.
|
|||
|
qq{COALESCE(ROUND(EXTRACT(epoch FROM now()-$start)),0) AS seconds }.
|
|||
|
qq{FROM pg_stat_activity WHERE $clause$USERWHERECLAUSE }.
|
|||
|
q{ORDER BY query_start, procpid DESC};
|
|||
|
}
|
|||
|
|
|||
|
my $info = run_command($SQL, { emptyok => 1 } );
|
|||
|
|
|||
|
|
|||
|
## Extract the first entry
|
|||
|
$db = $info->{db}[0];
|
|||
|
|
|||
|
## Store the current longest row
|
|||
|
my $maxr = { seconds => 0 };
|
|||
|
|
|||
|
## How many valid rows did we get?
|
|||
|
my $count = 0;
|
|||
|
|
|||
|
## Info about the top offender
|
|||
|
my $whodunit = '';
|
|||
|
if ($MRTG) {
|
|||
|
if (defined $db->{dbname}) {
|
|||
|
$whodunit = "DB: $db->{dbname}";
|
|||
|
} else {
|
|||
|
$whodunit = sprintf q{DB: %s}, msg('no-db');
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
## Process each returned row
|
|||
|
for my $r (@{ $db->{slurp} }) {
|
|||
|
|
|||
|
## Skip if we don't care about this database
|
|||
|
next if skip_item($r->{datname});
|
|||
|
|
|||
|
## We do a lot of filtering based on the current_query
|
|||
|
my $cq = $r->{current_query};
|
|||
|
|
|||
|
## Return unknown if we cannot see because we are a non-superuser?
|
|||
|
if ($cq =~ /insufficient/o) {
|
|||
|
add_unknown msg('psa-nosuper');
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
## Return unknown if stats_command_string / track_activities is off?
|
|||
|
if ($cq =~ /disabled/o) {
|
|||
|
add_unknown msg('psa-disabled');
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
## Detect other cases where pg_stat_activity is not fully populated
|
|||
|
if ($type ne 'qtime' and length $r->{xact_start} and $r->{xact_start} !~ /\d/o) {
|
|||
|
add_unknown msg('psa-noexact');
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
## Filter out based on the action
|
|||
|
next if $action eq 'txn_idle' and $cq ne '<IDLE> in transaction';
|
|||
|
|
|||
|
## Keep track of the longest overall time
|
|||
|
$maxr = $r if $r->{seconds} >= $maxr->{seconds};
|
|||
|
|
|||
|
$count++;
|
|||
|
}
|
|||
|
|
|||
|
## If there were no matches, then there were no rows, or no non-excluded rows
|
|||
|
## We don't care which at the moment, and return the same message
|
|||
|
if (! $count) {
|
|||
|
$MRTG and do_mrtg({one => 0, msg => $whodunit});
|
|||
|
$db->{perf} = "$perf=0;$wtime;$ctime";
|
|||
|
|
|||
|
add_ok msg("$type-none");
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
## Extract the seconds to avoid typing out the hash each time
|
|||
|
my $max = $maxr->{seconds};
|
|||
|
|
|||
|
## See if we have a minimum number of matches
|
|||
|
my $base_count = $wcount || $ccount;
|
|||
|
if ($base_count and $count < $base_count) {
|
|||
|
$db->{perf} = "$perf=$count;$wcount;$ccount";
|
|||
|
add_ok msg("$type-count-none", $base_count);
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
## Details on who the top offender was
|
|||
|
if ($max > 0) {
|
|||
|
$whodunit = sprintf q{%s:%s %s:%s %s:%s%s%s},
|
|||
|
msg('PID'), $maxr->{procpid},
|
|||
|
msg('database'), $maxr->{datname},
|
|||
|
msg('username'), $maxr->{usename},
|
|||
|
$maxr->{client_addr} eq '' ? '' : (sprintf ' %s:%s', msg('address'), $maxr->{client_addr}),
|
|||
|
($maxr->{client_port} eq '' or $maxr->{client_port} < 1)
|
|||
|
? '' : (sprintf ' %s:%s', msg('port'), $maxr->{client_port});
|
|||
|
}
|
|||
|
|
|||
|
## For MRTG, we can simply exit right now
|
|||
|
if ($MRTG) {
|
|||
|
do_mrtg({one => $max, msg => $whodunit});
|
|||
|
exit;
|
|||
|
}
|
|||
|
|
|||
|
## If the number of seconds is high, show an alternate form
|
|||
|
my $ptime = $max > 300 ? ' (' . pretty_time($max) . ')' : '';
|
|||
|
|
|||
|
## Show the maximum number of seconds in the perf section
|
|||
|
$db->{perf} .= sprintf q{%s=%ss;%s;%s},
|
|||
|
$perf,
|
|||
|
$max,
|
|||
|
$wtime,
|
|||
|
$ctime;
|
|||
|
|
|||
|
if (length $ctime and length $ccount) {
|
|||
|
if ($max >= $ctime and $count >= $ccount) {
|
|||
|
add_critical msg("$type-for-msg", $count, $ctime, $max, $ptime, $whodunit);
|
|||
|
return;
|
|||
|
}
|
|||
|
}
|
|||
|
elsif (length $ctime) {
|
|||
|
if ($max >= $ctime) {
|
|||
|
add_critical msg("$type-msg", $max, $ptime, $whodunit);
|
|||
|
return;
|
|||
|
}
|
|||
|
}
|
|||
|
elsif (length $ccount) {
|
|||
|
if ($count >= $ccount) {
|
|||
|
add_critical msg("$type-count-msg", $count);
|
|||
|
return;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if (length $wtime and length $wcount) {
|
|||
|
if ($max >= $wtime and $count >= $wcount) {
|
|||
|
add_warning msg("$type-for-msg", $count, $wtime, $max, $ptime, $whodunit);
|
|||
|
return;
|
|||
|
}
|
|||
|
}
|
|||
|
elsif (length $wtime) {
|
|||
|
if ($max >= $wtime) {
|
|||
|
add_warning msg("$type-msg", $max, $ptime, $whodunit);
|
|||
|
return;
|
|||
|
}
|
|||
|
}
|
|||
|
elsif (length $wcount) {
|
|||
|
if ($count >= $wcount) {
|
|||
|
add_warning msg("$type-count-msg", $count);
|
|||
|
return;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
add_ok msg("$type-msg", $max, $ptime, $whodunit);
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_txn_idle
|
|||
|
|
|||
|
|
|||
|
sub check_txn_time {
|
|||
|
|
|||
|
## This is the same as check_txn_idle, but we want where the
|
|||
|
## transaction start time is not null
|
|||
|
|
|||
|
check_txn_idle('txntime',
|
|||
|
'',
|
|||
|
'',
|
|||
|
'xact_start',
|
|||
|
q{xact_start IS NOT NULL});
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_txn_time
|
|||
|
|
|||
|
|
|||
|
sub check_txn_wraparound {
|
|||
|
|
|||
|
## Check how close to transaction wraparound we are on all databases
|
|||
|
## Supports: Nagios, MRTG
|
|||
|
## Warning and critical are the number of transactions performed
|
|||
|
## Thus, anything *over* that number will trip the alert
|
|||
|
## See: http://www.postgresql.org/docs/current/static/routine-vacuuming.html#VACUUM-FOR-WRAPAROUND
|
|||
|
## It makes no sense to run this more than once on the same cluster
|
|||
|
|
|||
|
my ($warning, $critical) = validate_range
|
|||
|
({
|
|||
|
type => 'positive integer',
|
|||
|
default_warning => 1_300_000_000,
|
|||
|
default_critical => 1_400_000_000,
|
|||
|
});
|
|||
|
|
|||
|
if ($warning and $warning >= 2_000_000_000) {
|
|||
|
ndie msg('txnwrap-wbig');
|
|||
|
}
|
|||
|
if ($critical and $critical >= 2_000_000_000) {
|
|||
|
ndie msg('txnwrap-cbig');
|
|||
|
}
|
|||
|
|
|||
|
$SQL = q{SELECT datname, age(datfrozenxid) AS age FROM pg_database WHERE datallowconn ORDER BY 1, 2};
|
|||
|
my $info = run_command($SQL, { regex => qr[\w+\s+\|\s+\d+] } );
|
|||
|
|
|||
|
my ($mrtgmax,$mrtgmsg) = (0,'?');
|
|||
|
for $db (@{$info->{db}}) {
|
|||
|
my ($max,$msg) = (0,'?');
|
|||
|
for my $r (@{$db->{slurp}}) {
|
|||
|
my ($dbname,$dbtxns) = ($r->{datname},$r->{age});
|
|||
|
$db->{perf} .= sprintf ' %s=%s;%s;%s;%s;%s',
|
|||
|
perfname($dbname), $dbtxns, $warning, $critical, 0, 2000000000;
|
|||
|
next SLURP if skip_item($dbname);
|
|||
|
if ($dbtxns > $max) {
|
|||
|
$max = $dbtxns;
|
|||
|
$msg = qq{$dbname: $dbtxns};
|
|||
|
if ($dbtxns > $mrtgmax) {
|
|||
|
$mrtgmax = $dbtxns;
|
|||
|
$mrtgmsg = "DB: $dbname";
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
if (length $critical and $max >= $critical) {
|
|||
|
add_critical $msg;
|
|||
|
}
|
|||
|
elsif (length $warning and $max >= $warning) {
|
|||
|
add_warning $msg;
|
|||
|
}
|
|||
|
else {
|
|||
|
add_ok $msg;
|
|||
|
}
|
|||
|
}
|
|||
|
$MRTG and do_mrtg({one => $mrtgmax, msg => $mrtgmsg});
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_txn_wraparound
|
|||
|
|
|||
|
|
|||
|
sub check_version {
|
|||
|
|
|||
|
## Compare version with what we think it should be
|
|||
|
## Supports: Nagios, MRTG
|
|||
|
## Warning and critical are the major and minor (e.g. 8.3)
|
|||
|
## or the major, minor, and revision (e.g. 8.2.4 or even 8.3beta4)
|
|||
|
|
|||
|
if ($MRTG) {
|
|||
|
if (!exists $opt{mrtg} or $opt{mrtg} !~ /^\d+\.\d+/) {
|
|||
|
ndie msg('version-badmrtg');
|
|||
|
}
|
|||
|
if ($opt{mrtg} =~ /^\d+\.\d+$/) {
|
|||
|
$opt{critical} = $opt{mrtg};
|
|||
|
}
|
|||
|
else {
|
|||
|
$opt{warning} = $opt{mrtg};
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
my ($warning, $critical) = validate_range({type => 'version', forcemrtg => 1});
|
|||
|
|
|||
|
my ($warnfull, $critfull) = (($warning =~ /^\d+\.\d+$/ ? 0 : 1),($critical =~ /^\d+\.\d+$/ ? 0 : 1));
|
|||
|
|
|||
|
my $info = run_command('SELECT version() AS version');
|
|||
|
|
|||
|
for $db (@{$info->{db}}) {
|
|||
|
my $row = $db->{slurp}[0];
|
|||
|
if ($row->{version} !~ /((\d+\.\d+)(\w+|\.\d+))/o) {
|
|||
|
add_unknown msg('invalid-query', $row->{version});
|
|||
|
next;
|
|||
|
}
|
|||
|
my ($full,$version,$revision) = ($1,$2,$3||'?');
|
|||
|
$revision =~ s/^\.//;
|
|||
|
|
|||
|
my $ok = 1;
|
|||
|
|
|||
|
if (length $critical) {
|
|||
|
if (($critfull and $critical ne $full)
|
|||
|
or (!$critfull and $critical ne $version)) {
|
|||
|
$MRTG and do_mrtg({one => 0, msg => $full});
|
|||
|
add_critical msg('version-fail', $full, $critical);
|
|||
|
$ok = 0;
|
|||
|
}
|
|||
|
}
|
|||
|
elsif (length $warning) {
|
|||
|
if (($warnfull and $warning ne $full)
|
|||
|
or (!$warnfull and $warning ne $version)) {
|
|||
|
$MRTG and do_mrtg({one => 0, msg => $full});
|
|||
|
add_warning msg('version-fail', $full, $warning);
|
|||
|
$ok = 0;
|
|||
|
}
|
|||
|
}
|
|||
|
if ($ok) {
|
|||
|
$MRTG and do_mrtg({one => 1, msg => $full});
|
|||
|
add_ok msg('version-ok', $full);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_version
|
|||
|
|
|||
|
|
|||
|
sub check_wal_files {
|
|||
|
|
|||
|
## Check on the number of WAL, or WAL "ready", files in use
|
|||
|
## Supports: Nagios, MRTG
|
|||
|
## Must run as a superuser
|
|||
|
## Critical and warning are the number of files
|
|||
|
## Example: --critical=40
|
|||
|
|
|||
|
my $subdir = shift || '';
|
|||
|
my $extrabit = shift || '';
|
|||
|
|
|||
|
my $default_warning = shift || 10;
|
|||
|
my $default_critical = shift || 15;
|
|||
|
|
|||
|
my $arg = {type => 'positive integer', leastone => 1};
|
|||
|
if ($default_warning) {
|
|||
|
$arg->{default_warning} = $default_warning;
|
|||
|
}
|
|||
|
if ($default_critical) {
|
|||
|
$arg->{default_critical} = $default_critical;
|
|||
|
}
|
|||
|
|
|||
|
my ($warning, $critical) = validate_range($arg);
|
|||
|
|
|||
|
## Figure out where the pg_xlog directory is
|
|||
|
$SQL = qq{SELECT count(*) AS count FROM pg_ls_dir('pg_xlog$subdir') WHERE pg_ls_dir ~ E'^[0-9A-F]{24}$extrabit\$'}; ## no critic (RequireInterpolationOfMetachars)
|
|||
|
|
|||
|
my $info = run_command($SQL, {regex => qr[\d] });
|
|||
|
|
|||
|
my $found = 0;
|
|||
|
for $db (@{$info->{db}}) {
|
|||
|
my $r = $db->{slurp}[0];
|
|||
|
my $numfiles = $r->{count};
|
|||
|
if ($MRTG) {
|
|||
|
do_mrtg({one => $numfiles});
|
|||
|
}
|
|||
|
my $msg = $extrabit ? msg('wal-numfound2', $numfiles, $extrabit)
|
|||
|
: msg('wal-numfound', $numfiles);
|
|||
|
$db->{perf} .= sprintf '%s=%s;%s;%s',
|
|||
|
perfname(msg('files')), $numfiles, $warning, $critical;
|
|||
|
if (length $critical and $numfiles > $critical) {
|
|||
|
add_critical $msg;
|
|||
|
}
|
|||
|
elsif (length $warning and $numfiles > $warning) {
|
|||
|
add_warning $msg;
|
|||
|
}
|
|||
|
else {
|
|||
|
add_ok $msg;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return;
|
|||
|
|
|||
|
} ## end of check_wal_files
|
|||
|
|
|||
|
|
|||
|
|
|||
|
=pod
|
|||
|
|
|||
|
=head1 NAME
|
|||
|
|
|||
|
B<check_postgres.pl> - a Postgres monitoring script for Nagios, MRTG, Cacti, and others
|
|||
|
|
|||
|
This documents describes check_postgres.pl version 2.19.0
|
|||
|
|
|||
|
=head1 SYNOPSIS
|
|||
|
|
|||
|
## Create all symlinks
|
|||
|
check_postgres.pl --symlinks
|
|||
|
|
|||
|
## Check connection to Postgres database 'pluto':
|
|||
|
check_postgres.pl --action=connection --db=pluto
|
|||
|
|
|||
|
## Same things, but using the symlink
|
|||
|
check_postgres_connection --db=pluto
|
|||
|
|
|||
|
## Warn if > 100 locks, critical if > 200, or > 20 exclusive
|
|||
|
check_postgres_locks --warning=100 --critical="total=200;exclusive=20"
|
|||
|
|
|||
|
## Show the current number of idle connections on port 6543:
|
|||
|
check_postgres_txn_idle --port=6543 --output=simple
|
|||
|
|
|||
|
## There are many other actions and options, please keep reading.
|
|||
|
|
|||
|
The latest news and documentation can always be found at:
|
|||
|
http://bucardo.org/check_postgres/
|
|||
|
|
|||
|
=head1 DESCRIPTION
|
|||
|
|
|||
|
check_postgres.pl is a Perl script that runs many different tests against
|
|||
|
one or more Postgres databases. It uses the psql program to gather the
|
|||
|
information, and outputs the results in one of three formats: Nagios, MRTG,
|
|||
|
or simple.
|
|||
|
|
|||
|
=head2 Output Modes
|
|||
|
|
|||
|
The output can be changed by use of the C<--output> option. The default output
|
|||
|
is nagios, although this can be changed at the top of the script if you wish. The
|
|||
|
current option choices are B<nagios>, B<mrtg>, and B<simple>. To avoid having to
|
|||
|
enter the output argument each time, the type of output is automatically set
|
|||
|
if no --output argument is given, and if the current directory has one of the
|
|||
|
output options in its name. For example, creating a directory named mrtg and
|
|||
|
populating it with symlinks via the I<--symlinks> argument would ensure that
|
|||
|
any actions run from that directory will always default to an output of "mrtg"
|
|||
|
As a shortcut for --output=simple, you can enter --simple, which also overrides
|
|||
|
the directory naming trick.
|
|||
|
|
|||
|
|
|||
|
=head3 Nagios output
|
|||
|
|
|||
|
The default output format is for Nagios, which is a single line of information, along
|
|||
|
with four specific exit codes:
|
|||
|
|
|||
|
=over 2
|
|||
|
|
|||
|
=item 0 (OK)
|
|||
|
|
|||
|
=item 1 (WARNING)
|
|||
|
|
|||
|
=item 2 (CRITICAL)
|
|||
|
|
|||
|
=item 3 (UNKNOWN)
|
|||
|
|
|||
|
=back
|
|||
|
|
|||
|
The output line is one of the words above, a colon, and then a short description of what
|
|||
|
was measured. Additional statistics information, as well as the total time the command
|
|||
|
took, can be output as well: see the documentation on the arguments
|
|||
|
I<L<--showperf|/--showperf=VAL>>,
|
|||
|
I<L<--perflimit|/--perflimit=i>>, and
|
|||
|
I<L<--showtime|/--showtime=VAL>>.
|
|||
|
|
|||
|
=head3 MRTG output
|
|||
|
|
|||
|
The MRTG output is four lines, with the first line always giving a single number of importance.
|
|||
|
When possible, this number represents an actual value such as a number of bytes, but it
|
|||
|
may also be a 1 or a 0 for actions that only return "true" or "false", such as check_postgres_version.
|
|||
|
The second line is an additional stat and is only used for some actions. The third line indicates
|
|||
|
an "uptime" and is not used. The fourth line is a description and usually indicates the name of
|
|||
|
the database the stat from the first line was pulled from, but may be different depending on the
|
|||
|
action.
|
|||
|
|
|||
|
Some actions accept an optional I<--mrtg> argument to further control the output.
|
|||
|
|
|||
|
See the documentation on each action for details on the exact MRTG output for each one.
|
|||
|
|
|||
|
=head3 Simple output
|
|||
|
|
|||
|
The simple output is simply a truncated version of the MRTG one, and simply returns the first number
|
|||
|
and nothing else. This is very useful when you just want to check the state of something, regardless
|
|||
|
of any threshold. You can transform the numeric output by appending KB, MB, GB, TB, or EB to the output
|
|||
|
argument, for example:
|
|||
|
|
|||
|
--output=simple,MB
|
|||
|
|
|||
|
=head3 Cacti output
|
|||
|
|
|||
|
The Cacti output consists of one or more items on the same line, with a simple name, a colon, and
|
|||
|
then a number. At the moment, the only action with explicit Cacti output is 'dbstats', and using
|
|||
|
the --output option is not needed in this case, as Cacti is the only output for this action. For many
|
|||
|
other actions, using --simple is enough to make Cacti happy.
|
|||
|
|
|||
|
=head1 DATABASE CONNECTION OPTIONS
|
|||
|
|
|||
|
All actions accept a common set of database options.
|
|||
|
|
|||
|
=over 4
|
|||
|
|
|||
|
=item B<-H NAME> or B<--host=NAME>
|
|||
|
|
|||
|
Connect to the host indicated by NAME. Can be a comma-separated list of names. Multiple host arguments
|
|||
|
are allowed. If no host is given, defaults to the C<PGHOST> environment variable or no host at all
|
|||
|
(which indicates using a local Unix socket). You may also use "--dbhost".
|
|||
|
|
|||
|
=item B<-p PORT> or B<--port=PORT>
|
|||
|
|
|||
|
Connects using the specified PORT number. Can be a comma-separated list of port numbers, and multiple
|
|||
|
port arguments are allowed. If no port number is given, defaults to the C<PGPORT> environment variable. If
|
|||
|
that is not set, it defaults to 5432. You may also use "--dbport"
|
|||
|
|
|||
|
=item B<-db NAME> or B<--dbname=NAME>
|
|||
|
|
|||
|
Specifies which database to connect to. Can be a comma-separated list of names, and multiple dbname
|
|||
|
arguments are allowed. If no dbname option is provided, defaults to the C<PGDATABASE> environment variable.
|
|||
|
If that is not set, it defaults to 'postgres' if psql is version 8 or greater, and 'template1' otherwise.
|
|||
|
|
|||
|
=item B<-u USERNAME> or B<--dbuser=USERNAME>
|
|||
|
|
|||
|
The name of the database user to connect as. Can be a comma-separated list of usernames, and multiple
|
|||
|
dbuser arguments are allowed. If this is not provided, it defaults to the C<PGUSER> environment variable, otherwise
|
|||
|
it defaults to 'postgres'.
|
|||
|
|
|||
|
=item B<--dbpass=PASSWORD>
|
|||
|
|
|||
|
Provides the password to connect to the database with. Use of this option is highly discouraged.
|
|||
|
Instead, one should use a .pgpass or pg_service.conf file.
|
|||
|
|
|||
|
=item B<--dbservice=NAME>
|
|||
|
|
|||
|
The name of a service inside of the pg_service.conf file. This file is in your home directory by
|
|||
|
default and contains a simple list of connection options. You can also pass additional information
|
|||
|
when using this option such as --dbservice="maindatabase sslmode=require"
|
|||
|
|
|||
|
=back
|
|||
|
|
|||
|
The database connection options can be grouped: I<--host=a,b --host=c --port=1234 --port=3344>
|
|||
|
would connect to a-1234, b-1234, and c-3344. Note that once set, an option
|
|||
|
carries over until it is changed again.
|
|||
|
|
|||
|
Examples:
|
|||
|
|
|||
|
--host=a,b --port=5433 --db=c
|
|||
|
Connects twice to port 5433, using database c, to hosts a and b: a-5433-c b-5433-c
|
|||
|
|
|||
|
--host=a,b --port=5433 --db=c,d
|
|||
|
Connects four times: a-5433-c a-5433-d b-5433-c b-5433-d
|
|||
|
|
|||
|
--host=a,b --host=foo --port=1234 --port=5433 --db=e,f
|
|||
|
Connects six times: a-1234-e a-1234-f b-1234-e b-1234-f foo-5433-e foo-5433-f
|
|||
|
|
|||
|
--host=a,b --host=x --port=5432,5433 --dbuser=alice --dbuser=bob -db=baz
|
|||
|
Connects three times: a-5432-alice-baz b-5433-alice-baz x-5433-bob-baz
|
|||
|
|
|||
|
--dbservice="foo" --port=5433
|
|||
|
Connects using the named service 'foo' in the pg_service.conf file, but overrides the port
|
|||
|
|
|||
|
=head1 OTHER OPTIONS
|
|||
|
|
|||
|
Other options include:
|
|||
|
|
|||
|
=over 4
|
|||
|
|
|||
|
=item B<--action=NAME>
|
|||
|
|
|||
|
States what action we are running. Required unless using a symlinked file,
|
|||
|
in which case the name of the file is used to figure out the action.
|
|||
|
|
|||
|
=item B<--warning=VAL or -w VAL>
|
|||
|
|
|||
|
Sets the threshold at which a warning alert is fired. The valid options for this
|
|||
|
option depends on the action used.
|
|||
|
|
|||
|
=item B<--critical=VAL or -c VAL>
|
|||
|
|
|||
|
Sets the threshold at which a critical alert is fired. The valid options for this
|
|||
|
option depends on the action used.
|
|||
|
|
|||
|
=item B<-t VAL> or B<--timeout=VAL>
|
|||
|
|
|||
|
Sets the timeout in seconds after which the script will abort whatever it is doing
|
|||
|
and return an UNKNOWN status. The timeout is per Postgres cluster, not for the entire
|
|||
|
script. The default value is 10; the units are always in seconds.
|
|||
|
|
|||
|
=item B<--assume-standby-mode>
|
|||
|
|
|||
|
If specified, first the check if server in standby mode will be performed
|
|||
|
(--datadir is required), if so, all checks that require SQL queries will be
|
|||
|
ignored and "Server in standby mode" with OK status will be returned instead.
|
|||
|
|
|||
|
Example:
|
|||
|
|
|||
|
postgres@db$./check_postgres.pl --action=version --warning=8.1 --datadir /var/lib/postgresql/8.3/main/ --assume-standby-mode
|
|||
|
POSTGRES_VERSION OK: Server in standby mode | time=0.00
|
|||
|
|
|||
|
=item B<--assume-prod>
|
|||
|
|
|||
|
If specified, check if server in production mode is performed (--datadir is required).
|
|||
|
The option is only relevant for (C<symlink: check_postgres_checkpoint>).
|
|||
|
|
|||
|
Example:
|
|||
|
|
|||
|
postgres@db$./check_postgres.pl --action=checkpoint --datadir /var/lib/postgresql/8.3/main/ --assume-prod
|
|||
|
POSTGRES_CHECKPOINT OK: Last checkpoint was 72 seconds ago | age=72;;300 mode=MASTER
|
|||
|
|
|||
|
=item B<-h> or B<--help>
|
|||
|
|
|||
|
Displays a help screen with a summary of all actions and options.
|
|||
|
|
|||
|
=item B<--man>
|
|||
|
|
|||
|
Displays the entire manual.
|
|||
|
|
|||
|
=item B<-V> or B<--version>
|
|||
|
|
|||
|
Shows the current version.
|
|||
|
|
|||
|
=item B<-v> or B<--verbose>
|
|||
|
|
|||
|
Set the verbosity level. Can call more than once to boost the level. Setting it to three
|
|||
|
or higher (in other words, issuing C<-v -v -v>) turns on debugging information for this
|
|||
|
program which is sent to stderr.
|
|||
|
|
|||
|
=item B<--showperf=VAL>
|
|||
|
|
|||
|
Determines if we output additional performance data in standard Nagios format
|
|||
|
(at end of string, after a pipe symbol, using name=value).
|
|||
|
VAL should be 0 or 1. The default is 1. Only takes effect if using Nagios output mode.
|
|||
|
|
|||
|
=item B<--perflimit=i>
|
|||
|
|
|||
|
Sets a limit as to how many items of interest are reported back when using the
|
|||
|
I<showperf> option. This only has an effect for actions that return a large
|
|||
|
number of items, such as B<table_size>. The default is 0, or no limit. Be
|
|||
|
careful when using this with the I<--include> or I<--exclude> options, as
|
|||
|
those restrictions are done I<after> the query has been run, and thus your
|
|||
|
limit may not include the items you want. Only takes effect if using Nagios output mode.
|
|||
|
|
|||
|
=item B<--showtime=VAL>
|
|||
|
|
|||
|
Determines if the time taken to run each query is shown in the output. VAL
|
|||
|
should be 0 or 1. The default is 1. No effect unless I<showperf> is on.
|
|||
|
Only takes effect if using Nagios output mode.
|
|||
|
|
|||
|
=item B<--test>
|
|||
|
|
|||
|
Enables test mode. See the L</"TEST MODE"> section below.
|
|||
|
|
|||
|
=item B<--PSQL=PATH>
|
|||
|
|
|||
|
Tells the script where to find the psql program. Useful if you have more than
|
|||
|
one version of the psql executable on your system, or if there is no psql program
|
|||
|
in your path. Note that this option is in all uppercase. By default, this option
|
|||
|
is I<not allowed>. To enable it, you must change the C<$NO_PSQL_OPTION> near the
|
|||
|
top of the script to 0. Avoid using this option if you can, and instead hard-code
|
|||
|
your psql location into the C<$PSQL> variable, also near the top of the script.
|
|||
|
|
|||
|
=item B<--symlinks>
|
|||
|
|
|||
|
Creates symlinks to the main program for each action.
|
|||
|
|
|||
|
=item B<--output=VAL>
|
|||
|
|
|||
|
Determines the format of the output, for use in various programs. The
|
|||
|
default is 'nagios'. Available options are 'nagios', 'mrtg', 'simple'
|
|||
|
and 'cacti'.
|
|||
|
|
|||
|
=item B<--mrtg=VAL>
|
|||
|
|
|||
|
Used only for the MRTG or simple output, for a few specific actions.
|
|||
|
|
|||
|
=item B<--debugoutput=VAL>
|
|||
|
|
|||
|
Outputs the exact string returned by psql, for use in debugging. The value is one or more letters,
|
|||
|
which determine if the output is displayed or not, where 'a' = all, 'c' = critical, 'w' = warning,
|
|||
|
'o' = ok, and 'u' = unknown. Letters can be combined.
|
|||
|
|
|||
|
=item B<--get_method=VAL>
|
|||
|
|
|||
|
Allows specification of the method used to fetch information for the C<new_version_cp>,
|
|||
|
C<new_version_pg>, C<new_version_bc>, C<new_version_box>, and C<new_version_tnm> checks.
|
|||
|
The following programs are tried, in order, to grab the information from the web:
|
|||
|
GET, wget, fetch, curl, lynx, links. To force the use of just one (and thus remove the
|
|||
|
overhead of trying all the others until one of those works), enter one of the names as
|
|||
|
the argument to get_method. For example, a BSD box might enter the following line in
|
|||
|
their C<.check_postgresrc> file:
|
|||
|
|
|||
|
get_method=fetch
|
|||
|
|
|||
|
=item B<--language=VAL>
|
|||
|
|
|||
|
Set the language to use for all output messages. Normally, this is detected by examining
|
|||
|
the environment variables LC_ALL, LC_MESSAGES, and LANG, but setting this option
|
|||
|
will override any such detection.
|
|||
|
|
|||
|
=back
|
|||
|
|
|||
|
|
|||
|
=head1 ACTIONS
|
|||
|
|
|||
|
The script runs one or more actions. This can either be done with the --action
|
|||
|
flag, or by using a symlink to the main file that contains the name of the action
|
|||
|
inside of it. For example, to run the action "timesync", you may either issue:
|
|||
|
|
|||
|
check_postgres.pl --action=timesync
|
|||
|
|
|||
|
or use a program named:
|
|||
|
|
|||
|
check_postgres_timesync
|
|||
|
|
|||
|
All the symlinks are created for you in the current directory
|
|||
|
if use the option --symlinks
|
|||
|
|
|||
|
perl check_postgres.pl --symlinks
|
|||
|
|
|||
|
If the file name already exists, it will not be overwritten. If the file exists
|
|||
|
and is a symlink, you can force it to overwrite by using "--action=build_symlinks_force"
|
|||
|
|
|||
|
Most actions take a I<--warning> and a I<--critical> option, indicating at what
|
|||
|
point we change from OK to WARNING, and what point we go to CRITICAL. Note that
|
|||
|
because criticals are always checked first, setting the warning equal to the
|
|||
|
critical is an effective way to turn warnings off and always give a critical.
|
|||
|
|
|||
|
The current supported actions are:
|
|||
|
|
|||
|
=head2 B<archive_ready>
|
|||
|
|
|||
|
(C<symlink: check_postgres_archive_ready>) Checks how many WAL files with extension F<.ready>
|
|||
|
exist in the F<pg_xlog/archive_status> directory, which is found
|
|||
|
off of your B<data_directory>. This action must be run as a superuser, in order to access the
|
|||
|
contents of the F<pg_xlog/archive_status> directory. The minimum version to use this action is
|
|||
|
Postgres 8.1. The I<--warning> and I<--critical> options are simply the number of
|
|||
|
F<.ready> files in the F<pg_xlog/archive_status> directory.
|
|||
|
Usually, these values should be low, turning on the archive mechanism, we usually want it to
|
|||
|
archive WAL files as fast as possible.
|
|||
|
|
|||
|
If the archive command fail, number of WAL in your F<pg_xlog> directory will grow until
|
|||
|
exhausting all the disk space and force PostgreSQL to stop immediately.
|
|||
|
|
|||
|
Example 1: Check that the number of ready WAL files is 10 or less on host "pluto"
|
|||
|
|
|||
|
check_postgres_archive_ready --host=pluto --critical=10
|
|||
|
|
|||
|
For MRTG output, reports the number of ready WAL files on line 1.
|
|||
|
|
|||
|
=head2 B<autovac_freeze>
|
|||
|
|
|||
|
(C<symlink: check_postgres_autovac_freeze>) Checks how close each database is to the Postgres B<autovacuum_freeze_max_age> setting. This
|
|||
|
action will only work for databases version 8.2 or higher. The I<--warning> and
|
|||
|
I<--critical> options should be expressed as percentages. The 'age' of the transactions
|
|||
|
in each database is compared to the autovacuum_freeze_max_age setting (200 million by default)
|
|||
|
to generate a rounded percentage. The default values are B<90%> for the warning and B<95%> for
|
|||
|
the critical. Databases can be filtered by use of the I<--include> and I<--exclude> options.
|
|||
|
See the L</"BASIC FILTERING"> section for more details.
|
|||
|
|
|||
|
Example 1: Give a warning when any databases on port 5432 are above 97%
|
|||
|
|
|||
|
check_postgres_autovac_freeze --port=5432 --warning="97%"
|
|||
|
|
|||
|
For MRTG output, the highest overall percentage is reported on the first line, and the highest age is
|
|||
|
reported on the second line. All databases which have the percentage from the first line are reported
|
|||
|
on the fourth line, separated by a pipe symbol.
|
|||
|
|
|||
|
=head2 B<backends>
|
|||
|
|
|||
|
(C<symlink: check_postgres_backends>) Checks the current number of connections for one or more databases, and optionally
|
|||
|
compares it to the maximum allowed, which is determined by the
|
|||
|
Postgres configuration variable B<max_connections>. The I<--warning> and
|
|||
|
I<--critical> options can take one of three forms. First, a simple number can be
|
|||
|
given, which represents the number of connections at which the alert will be
|
|||
|
given. This choice does not use the B<max_connections> setting. Second, the
|
|||
|
percentage of available connections can be given. Third, a negative number can
|
|||
|
be given which represents the number of connections left until B<max_connections>
|
|||
|
is reached. The default values for I<--warning> and I<--critical> are '90%' and '95%'.
|
|||
|
You can also filter the databases by use of the I<--include> and I<--exclude> options.
|
|||
|
See the L</"BASIC FILTERING"> section for more details.
|
|||
|
|
|||
|
To view only non-idle processes, you can use the I<--noidle> argument. Note that the
|
|||
|
user you are connecting as must be a superuser for this to work properly.
|
|||
|
|
|||
|
Example 1: Give a warning when the number of connections on host quirm reaches 120, and a critical if it reaches 150.
|
|||
|
|
|||
|
check_postgres_backends --host=quirm --warning=120 --critical=150
|
|||
|
|
|||
|
Example 2: Give a critical when we reach 75% of our max_connections setting on hosts lancre or lancre2.
|
|||
|
|
|||
|
check_postgres_backends --warning='75%' --critical='75%' --host=lancre,lancre2
|
|||
|
|
|||
|
Example 3: Give a warning when there are only 10 more connection slots left on host plasmid, and a critical
|
|||
|
when we have only 5 left.
|
|||
|
|
|||
|
check_postgres_backends --warning=-10 --critical=-5 --host=plasmid
|
|||
|
|
|||
|
Example 4: Check all databases except those with "test" in their name, but allow ones that are named "pg_greatest". Connect as port 5432 on the first two hosts, and as port 5433 on the third one. We want to always throw a critical when we reach 30 or more connections.
|
|||
|
|
|||
|
check_postgres_backends --dbhost=hong,kong --dbhost=fooey --dbport=5432 --dbport=5433 --warning=30 --critical=30 --exclude="~test" --include="pg_greatest,~prod"
|
|||
|
|
|||
|
For MRTG output, the number of connections is reported on the first line, and the fourth line gives the name of the database,
|
|||
|
plus the current maximum_connections. If more than one database has been queried, the one with the highest number of
|
|||
|
connections is output.
|
|||
|
|
|||
|
=head2 B<bloat>
|
|||
|
|
|||
|
(C<symlink: check_postgres_bloat>) Checks the amount of bloat in tables and indexes. (Bloat is generally the amount
|
|||
|
of dead unused space taken up in a table or index. This space is usually reclaimed
|
|||
|
by use of the VACUUM command.) This action requires that stats collection be
|
|||
|
enabled on the target databases, and requires that ANALYZE is run frequently.
|
|||
|
The I<--include> and I<--exclude> options can be used to filter out which tables
|
|||
|
to look at. See the L</"BASIC FILTERING"> section for more details.
|
|||
|
|
|||
|
The I<--warning> and I<--critical> options can be specified as sizes, percents, or both.
|
|||
|
Valid size units are bytes, kilobytes, megabytes, gigabytes, terabytes, exabytes,
|
|||
|
petabytes, and zettabytes. You can abbreviate all of those with the first letter. Items
|
|||
|
without units are assumed to be 'bytes'. The default values are '1 GB' and '5 GB'. The value
|
|||
|
represents the number of "wasted bytes", or the difference between what is actually
|
|||
|
used by the table and index, and what we compute that it should be.
|
|||
|
|
|||
|
Note that this action has two hard-coded values to avoid false alarms on
|
|||
|
smaller relations. Tables must have at least 10 pages, and indexes at least 15,
|
|||
|
before they can be considered by this test. If you really want to adjust these
|
|||
|
values, you can look for the variables I<$MINPAGES> and I<$MINIPAGES> at the top of the
|
|||
|
C<check_bloat> subroutine. These values are ignored if either I<--exclude> or
|
|||
|
I<--include> is used.
|
|||
|
|
|||
|
Only the top 10 most bloated relations are shown. You can change this number by
|
|||
|
using the I<--perflimit> option to set your own limit.
|
|||
|
|
|||
|
The schema named 'information_schema' is excluded from this test, as the only tables
|
|||
|
it contains are small and do not change.
|
|||
|
|
|||
|
Please note that the values computed by this action are not precise, and
|
|||
|
should be used as a guideline only. Great effort was made to estimate the
|
|||
|
correct size of a table, but in the end it is only an estimate. The correct
|
|||
|
index size is even more of a guess than the correct table size, but both
|
|||
|
should give a rough idea of how bloated things are.
|
|||
|
|
|||
|
Example 1: Warn if any table on port 5432 is over 100 MB bloated, and critical if over 200 MB
|
|||
|
|
|||
|
check_postgres_bloat --port=5432 --warning='100 M' --critical='200 M'
|
|||
|
|
|||
|
Example 2: Give a critical if table 'orders' on host 'sami' has more than 10 megs of bloat
|
|||
|
|
|||
|
check_postgres_bloat --host=sami --include=orders --critical='10 MB'
|
|||
|
|
|||
|
Example 3: Give a critical if table 'q4' on database 'sales' is over 50% bloated
|
|||
|
|
|||
|
check_postgres_bloat --db=sales --include=q4 --critical='50%'
|
|||
|
|
|||
|
Example 4: Give a critical any table is over 20% bloated I<and> has over 150
|
|||
|
MB of bloat:
|
|||
|
|
|||
|
check_postgres_bloat --port=5432 --critical='20% and 150 M'
|
|||
|
|
|||
|
Example 5: Give a critical any table is over 40% bloated I<or> has over 500 MB
|
|||
|
of bloat:
|
|||
|
|
|||
|
check_postgres_bloat --port=5432 --warning='500 M or 40%'
|
|||
|
|
|||
|
For MRTG output, the first line gives the highest number of wasted bytes for the tables, and the
|
|||
|
second line gives the highest number of wasted bytes for the indexes. The fourth line gives the database
|
|||
|
name, table name, and index name information. If you want to output the bloat ratio instead (how many
|
|||
|
times larger the relation is compared to how large it should be), just pass in C<--mrtg=ratio>.
|
|||
|
|
|||
|
=head2 B<checkpoint>
|
|||
|
|
|||
|
(C<symlink: check_postgres_checkpoint>) Determines how long since the last checkpoint has
|
|||
|
been run. This must run on the same server as the database that is being checked (e.g. the -h
|
|||
|
flag will not work). This check is meant to run on a "warm standby" server that is actively
|
|||
|
processing shipped WAL files, and is meant to check that your warm standby is truly 'warm'.
|
|||
|
The data directory must be set, either by the environment variable C<PGDATA>, or passing
|
|||
|
the C<--datadir> argument. It returns the number of seconds since the last checkpoint
|
|||
|
was run, as determined by parsing the call to C<pg_controldata>. Because of this, the
|
|||
|
pg_controldata executable must be available in the current path. Alternatively, you can
|
|||
|
set the environment variable C<PGCONTROLDATA> to the exact location of the pg_controldata
|
|||
|
executable, or you can specify C<PGBINDIR> as the directory that it lives in.
|
|||
|
It is also possible to use the special options I<--assume-prod> or
|
|||
|
I<--assume-standby-mode>, if the mode found is not the one expected, a CRITICAL is emitted.
|
|||
|
|
|||
|
At least one warning or critical argument must be set.
|
|||
|
|
|||
|
This action requires the Date::Parse module.
|
|||
|
|
|||
|
For MRTG or simple output, returns the number of seconds.
|
|||
|
|
|||
|
=head2 B<cluster_id>
|
|||
|
|
|||
|
(C<symlink: check_postgres_cluster-id>) Checks that the Database System Identifier
|
|||
|
provided by pg_controldata is the same as last time you checked. This must run on the same
|
|||
|
server as the database that is being checked (e.g. the -h flag will not work).
|
|||
|
Either the I<--warning> or the I<--critical> option should be given, but not both. The value
|
|||
|
of each one is the cluster identifier, an integer value. You can run with the special C<--critical=0> option
|
|||
|
to find out an existing cluster identifier.
|
|||
|
|
|||
|
Example 1: Find the initial identifier
|
|||
|
|
|||
|
check_postgres_cluster_id --critical=0 --datadir=/var//lib/postgresql/9.0/main
|
|||
|
|
|||
|
Example 2: Make sure the cluster is the same and warn if not, using the result from above.
|
|||
|
|
|||
|
check_postgres_cluster_id --critical=5633695740047915135
|
|||
|
|
|||
|
For MRTG output, returns a 1 or 0 indicating success of failure of the identifier to match. A
|
|||
|
identifier must be provided as the C<--mrtg> argument. The fourth line always gives the
|
|||
|
current identifier.
|
|||
|
|
|||
|
=head2 B<commitratio>
|
|||
|
|
|||
|
(C<symlink: check_postgres_commitratio>) Checks the commit ratio of all databases and complains when they are too low.
|
|||
|
There is no need to run this command more than once per database cluster.
|
|||
|
Databases can be filtered with
|
|||
|
the I<--include> and I<--exclude> options. See the L</"BASIC FILTERING"> section
|
|||
|
for more details.
|
|||
|
They can also be filtered by the owner of the database with the
|
|||
|
I<--includeuser> and I<--excludeuser> options.
|
|||
|
See the L</"USER NAME FILTERING"> section for more details.
|
|||
|
|
|||
|
The warning and critical options should be specified as percentages. There are not
|
|||
|
defaults for this action: the warning and critical must be specified. The warning value
|
|||
|
cannot be greater than the critical value. The output returns all databases sorted by
|
|||
|
commitratio, smallest first.
|
|||
|
|
|||
|
Example: Warn if any database on host flagg is less than 90% in commitratio, and critical if less then 80%.
|
|||
|
|
|||
|
check_postgres_database_commitratio --host=flagg --warning='90%' --critical='80%'
|
|||
|
|
|||
|
For MRTG output, returns the percentage of the database with the smallest commitratio on the first line,
|
|||
|
and the name of the database on the fourth line.
|
|||
|
|
|||
|
=head2 B<connection>
|
|||
|
|
|||
|
(C<symlink: check_postgres_connection>) Simply connects, issues a 'SELECT version()', and leaves.
|
|||
|
Takes no I<--warning> or I<--critical> options.
|
|||
|
|
|||
|
For MRTG output, simply outputs a 1 (good connection) or a 0 (bad connection) on the first line.
|
|||
|
|
|||
|
=head2 B<custom_query>
|
|||
|
|
|||
|
(C<symlink: check_postgres_custom_query>) Runs a custom query of your choosing, and parses the results.
|
|||
|
The query itself is passed in through the C<query> argument, and should be kept as simple as possible.
|
|||
|
If at all possible, wrap it in a view or a function to keep things easier to manage. The query should
|
|||
|
return one or two columns. It is required that one of the columns be named "result" and is the item
|
|||
|
that will be checked against your warning and critical values. The second column is for the performance
|
|||
|
data and any name can be used: this will be the 'value' inside the performance data section.
|
|||
|
|
|||
|
At least one warning or critical argument must be specified. What these are set to depends on the type of
|
|||
|
query you are running. There are four types of custom_queries that can be run, specified by the C<valtype>
|
|||
|
argument. If none is specified, this action defaults to 'integer'. The four types are:
|
|||
|
|
|||
|
B<integer>:
|
|||
|
Does a simple integer comparison. The first column should be a simple integer, and the warning and
|
|||
|
critical values should be the same.
|
|||
|
|
|||
|
B<string>:
|
|||
|
The warning and critical are strings, and are triggered only if the value in the first column matches
|
|||
|
it exactly. This is case-sensitive.
|
|||
|
|
|||
|
B<time>:
|
|||
|
The warning and the critical are times, and can have units of seconds, minutes, hours, or days.
|
|||
|
Each may be written singular or abbreviated to just the first letter. If no units are given,
|
|||
|
seconds are assumed. The first column should be an integer representing the number of seconds
|
|||
|
to check.
|
|||
|
|
|||
|
B<size>:
|
|||
|
The warning and the critical are sizes, and can have units of bytes, kilobytes, megabytes, gigabytes,
|
|||
|
terabytes, or exabytes. Each may be abbreviated to the first letter. If no units are given,
|
|||
|
bytes are assumed. The first column should be an integer representing the number of bytes to check.
|
|||
|
|
|||
|
Normally, an alert is triggered if the values returned are B<greater than> or equal to the critical or warning
|
|||
|
value. However, an option of I<--reverse> will trigger the alert if the returned value is
|
|||
|
B<lower than> or equal to the critical or warning value.
|
|||
|
|
|||
|
Example 1: Warn if any relation over 100 pages is named "rad", put the number of pages
|
|||
|
inside the performance data section.
|
|||
|
|
|||
|
check_postgres_custom_query --valtype=string -w "rad" --query=
|
|||
|
"SELECT relname AS result, relpages AS pages FROM pg_class WHERE relpages > 100"
|
|||
|
|
|||
|
Example 2: Give a critical if the "foobar" function returns a number over 5MB:
|
|||
|
|
|||
|
check_postgres_custom_query --critical='5MB'--valtype=size --query="SELECT foobar() AS result"
|
|||
|
|
|||
|
Example 2: Warn if the function "snazzo" returns less than 42:
|
|||
|
|
|||
|
check_postgres_custom_query --critical=42 --query="SELECT snazzo() AS result" --reverse
|
|||
|
|
|||
|
If you come up with a useful custom_query, consider sending in a patch to this program
|
|||
|
to make it into a standard action that other people can use.
|
|||
|
|
|||
|
This action does not support MRTG or simple output yet.
|
|||
|
|
|||
|
=head2 B<database_size>
|
|||
|
|
|||
|
(C<symlink: check_postgres_database_size>) Checks the size of all databases and complains when they are too big.
|
|||
|
There is no need to run this command more than once per database cluster.
|
|||
|
Databases can be filtered with
|
|||
|
the I<--include> and I<--exclude> options. See the L</"BASIC FILTERING"> section
|
|||
|
for more details.
|
|||
|
They can also be filtered by the owner of the database with the
|
|||
|
I<--includeuser> and I<--excludeuser> options.
|
|||
|
See the L</"USER NAME FILTERING"> section for more details.
|
|||
|
|
|||
|
The warning and critical options can be specified as bytes, kilobytes, megabytes,
|
|||
|
gigabytes, terabytes, or exabytes. Each may be abbreviated to the first letter as well.
|
|||
|
If no unit is given, the units are assumed to be bytes. There are not defaults for this
|
|||
|
action: the warning and critical must be specified. The warning value cannot be greater
|
|||
|
than the critical value. The output returns all databases sorted by size largest first,
|
|||
|
showing both raw bytes and a "pretty" version of the size.
|
|||
|
|
|||
|
Example 1: Warn if any database on host flagg is over 1 TB in size, and critical if over 1.1 TB.
|
|||
|
|
|||
|
check_postgres_database_size --host=flagg --warning='1 TB' --critical='1.1 t'
|
|||
|
|
|||
|
Example 2: Give a critical if the database template1 on port 5432 is over 10 MB.
|
|||
|
|
|||
|
check_postgres_database_size --port=5432 --include=template1 --warning='10MB' --critical='10MB'
|
|||
|
|
|||
|
Example 3: Give a warning if any database on host 'tardis' owned by the user 'tom' is over 5 GB
|
|||
|
|
|||
|
check_postgres_database_size --host=tardis --includeuser=tom --warning='5 GB' --critical='10 GB'
|
|||
|
|
|||
|
For MRTG output, returns the size in bytes of the largest database on the first line,
|
|||
|
and the name of the database on the fourth line.
|
|||
|
|
|||
|
=head2 B<dbstats>
|
|||
|
|
|||
|
(C<symlink: check_postgres_dbstats>) Reports information from the pg_stat_database view,
|
|||
|
and outputs it in a Cacti-friendly manner. No other output is supported, as the output
|
|||
|
is informational and does not lend itself to alerts, such as used with Nagios. If no
|
|||
|
options are given, all databases are returned, one per line. You can include a specific
|
|||
|
database by use of the C<--include> option, or you can use the C<--dbname> option.
|
|||
|
|
|||
|
Eleven items are returned on each line, in the format name:value, separated by a single
|
|||
|
space. The items are:
|
|||
|
|
|||
|
=over 4
|
|||
|
|
|||
|
=item backends
|
|||
|
|
|||
|
The number of currently running backends for this database.
|
|||
|
|
|||
|
=item commits
|
|||
|
|
|||
|
The total number of commits for this database since it was created or reset.
|
|||
|
|
|||
|
=item rollbacks
|
|||
|
|
|||
|
The total number of rollbacks for this database since it was created or reset.
|
|||
|
|
|||
|
=item read
|
|||
|
|
|||
|
The total number of disk blocks read.
|
|||
|
|
|||
|
=item hit
|
|||
|
|
|||
|
The total number of buffer hits.
|
|||
|
|
|||
|
=item ret
|
|||
|
|
|||
|
The total number of rows returned.
|
|||
|
|
|||
|
=item fetch
|
|||
|
|
|||
|
The total number of rows fetched.
|
|||
|
|
|||
|
=item ins
|
|||
|
|
|||
|
The total number of rows inserted.
|
|||
|
|
|||
|
=item upd
|
|||
|
|
|||
|
The total number of rows updated.
|
|||
|
|
|||
|
=item del
|
|||
|
|
|||
|
The total number of rows deleted.
|
|||
|
|
|||
|
=item dbname
|
|||
|
|
|||
|
The name of the database.
|
|||
|
|
|||
|
=back
|
|||
|
|
|||
|
Note that ret, fetch, ins, upd, and del items will always be 0 if Postgres is version 8.2 or lower, as those stats were
|
|||
|
not available in those versions.
|
|||
|
|
|||
|
If the dbname argument is given, seven additional items are returned:
|
|||
|
|
|||
|
=over 4
|
|||
|
|
|||
|
=item idxscan
|
|||
|
|
|||
|
Total number of user index scans.
|
|||
|
|
|||
|
=item idxtupread
|
|||
|
|
|||
|
Total number of user index entries returned.
|
|||
|
|
|||
|
=item idxtupfetch
|
|||
|
|
|||
|
Total number of rows fetched by simple user index scans.
|
|||
|
|
|||
|
=item idxblksread
|
|||
|
|
|||
|
Total number of disk blocks read for all user indexes.
|
|||
|
|
|||
|
=item idxblkshit
|
|||
|
|
|||
|
Total number of buffer hits for all user indexes.
|
|||
|
|
|||
|
=item seqscan
|
|||
|
|
|||
|
Total number of sequential scans against all user tables.
|
|||
|
|
|||
|
=item seqtupread
|
|||
|
|
|||
|
Total number of tuples returned from all user tables.
|
|||
|
|
|||
|
=back
|
|||
|
|
|||
|
Example 1: Grab the stats for a database named "products" on host "willow":
|
|||
|
|
|||
|
check_postgres_dbstats --dbhost willow --dbname products
|
|||
|
|
|||
|
The output returned will be like this (all on one line, not wrapped):
|
|||
|
|
|||
|
backends:82 commits:58374408 rollbacks:1651 read:268435543 hit:2920381758 idxscan:310931294 idxtupread:2777040927
|
|||
|
idxtupfetch:1840241349 idxblksread:62860110 idxblkshit:1107812216 seqscan:5085305 seqtupread:5370500520
|
|||
|
ret:0 fetch:0 ins:0 upd:0 del:0 dbname:willow
|
|||
|
|
|||
|
=head2 B<disabled_triggers>
|
|||
|
|
|||
|
(C<symlink: check_postgres_disabled_triggers>) Checks on the number of disabled triggers inside the database.
|
|||
|
The I<--warning> and I<--critical> options are the number of such triggers found, and both
|
|||
|
default to "1", as in normal usage having disabled triggers is a dangerous event. If the
|
|||
|
database being checked is 8.3 or higher, the check is for the number of triggers that are
|
|||
|
in a 'disabled' status (as opposed to being 'always' or 'replica'). The output will show
|
|||
|
the name of the table and the name of the trigger for each disabled trigger.
|
|||
|
|
|||
|
Example 1: Make sure that there are no disabled triggers
|
|||
|
|
|||
|
check_postgres_disabled_triggers
|
|||
|
|
|||
|
For MRTG output, returns the number of disabled triggers on the first line.
|
|||
|
|
|||
|
=head2 B<disk_space>
|
|||
|
|
|||
|
(C<symlink: check_postgres_disk_space>) Checks on the available physical disk space used by Postgres. This action requires
|
|||
|
that you have the executable "/bin/df" available to report on disk sizes, and it
|
|||
|
also needs to be run as a superuser, so it can examine the B<data_directory>
|
|||
|
setting inside of Postgres. The I<--warning> and I<--critical> options are
|
|||
|
given in either sizes or percentages or both. If using sizes, the standard unit types
|
|||
|
are allowed: bytes, kilobytes, gigabytes, megabytes, gigabytes, terabytes, or
|
|||
|
exabytes. Each may be abbreviated to the first letter only; no units at all
|
|||
|
indicates 'bytes'. The default values are '90%' and '95%'.
|
|||
|
|
|||
|
This command checks the following things to determine all of the different
|
|||
|
physical disks being used by Postgres.
|
|||
|
|
|||
|
B<data_directory> - The disk that the main data directory is on.
|
|||
|
|
|||
|
B<log directory> - The disk that the log files are on.
|
|||
|
|
|||
|
B<WAL file directory> - The disk that the write-ahead logs are on (e.g. symlinked pg_xlog)
|
|||
|
|
|||
|
B<tablespaces> - Each tablespace that is on a separate disk.
|
|||
|
|
|||
|
The output shows the total size used and available on each disk, as well as
|
|||
|
the percentage, ordered by highest to lowest percentage used. Each item above
|
|||
|
maps to a file system: these can be included or excluded. See the
|
|||
|
L</"BASIC FILTERING"> section for more details.
|
|||
|
|
|||
|
Example 1: Make sure that no file system is over 90% for the database on port 5432.
|
|||
|
|
|||
|
check_postgres_disk_space --port=5432 --warning='90%' --critical="90%'
|
|||
|
|
|||
|
Example 2: Check that all file systems starting with /dev/sda are smaller than 10 GB and 11 GB (warning and critical)
|
|||
|
|
|||
|
check_postgres_disk_space --port=5432 --warning='10 GB' --critical='11 GB' --include="~^/dev/sda"
|
|||
|
|
|||
|
Example 4: Make sure that no file system is both over 50% I<and> has over 15 GB
|
|||
|
|
|||
|
check_postgres_disk_space --critical='50% and 15 GB'
|
|||
|
|
|||
|
Example 5: Issue a warning if any file system is either over 70% full I<or> has
|
|||
|
more than 1T
|
|||
|
|
|||
|
check_postgres_disk_space --warning='1T or 75'
|
|||
|
|
|||
|
For MRTG output, returns the size in bytes of the file system on the first line,
|
|||
|
and the name of the file system on the fourth line.
|
|||
|
|
|||
|
=head2 B<fsm_pages>
|
|||
|
|
|||
|
(C<symlink: check_postgres_fsm_pages>) Checks how close a cluster is to the Postgres B<max_fsm_pages> setting.
|
|||
|
This action will only work for databases of 8.2 or higher, and it requires the contrib
|
|||
|
module B<pg_freespacemap> be installed. The I<--warning> and I<--critical> options should be expressed
|
|||
|
as percentages. The number of used pages in the free-space-map is determined by looking in the
|
|||
|
pg_freespacemap_relations view, and running a formula based on the formula used for
|
|||
|
outputting free-space-map pageslots in the vacuum verbose command. The default values are B<85%> for the
|
|||
|
warning and B<95%> for the critical.
|
|||
|
|
|||
|
Example 1: Give a warning when our cluster has used up 76% of the free-space pageslots, with pg_freespacemap installed in database robert
|
|||
|
|
|||
|
check_postgres_fsm_pages --dbname=robert --warning="76%"
|
|||
|
|
|||
|
While you need to pass in the name of the database where pg_freespacemap is installed, you only need to run this check once per cluster. Also, checking this information does require obtaining special locks on the free-space-map, so it is recommend you do not run this check with short intervals.
|
|||
|
|
|||
|
For MRTG output, returns the percent of free-space-map on the first line, and the number of pages currently used on
|
|||
|
the second line.
|
|||
|
|
|||
|
=head2 B<fsm_relations>
|
|||
|
|
|||
|
(C<symlink: check_postgres_fsm_relations>) Checks how close a cluster is to the Postgres B<max_fsm_relations> setting.
|
|||
|
This action will only work for databases of 8.2 or higher, and it requires the contrib module B<pg_freespacemap> be
|
|||
|
installed. The I<--warning> and I<--critical> options should be expressed as percentages. The number of used relations
|
|||
|
in the free-space-map is determined by looking in the pg_freespacemap_relations view. The default values are B<85%> for
|
|||
|
the warning and B<95%> for the critical.
|
|||
|
|
|||
|
Example 1: Give a warning when our cluster has used up 80% of the free-space relations, with pg_freespacemap installed in database dylan
|
|||
|
|
|||
|
check_postgres_fsm_relations --dbname=dylan --warning="75%"
|
|||
|
|
|||
|
While you need to pass in the name of the database where pg_freespacemap is installed, you only need to run this check
|
|||
|
once per cluster. Also,
|
|||
|
checking this information does require obtaining special locks on the free-space-map, so it is recommend you do not
|
|||
|
run this check with short intervals.
|
|||
|
|
|||
|
For MRTG output, returns the percent of free-space-map on the first line, the number of relations currently used on
|
|||
|
the second line.
|
|||
|
|
|||
|
=head2 B<hitratio>
|
|||
|
|
|||
|
(C<symlink: check_postgres_database_hitratio>) Checks the hit ratio of all databases and complains when they are too low.
|
|||
|
There is no need to run this command more than once per database cluster.
|
|||
|
Databases can be filtered with
|
|||
|
the I<--include> and I<--exclude> options. See the L</"BASIC FILTERING"> section
|
|||
|
for more details.
|
|||
|
They can also be filtered by the owner of the database with the
|
|||
|
I<--includeuser> and I<--excludeuser> options.
|
|||
|
See the L</"USER NAME FILTERING"> section for more details.
|
|||
|
|
|||
|
The warning and critical options should be specified as percentages. There are not
|
|||
|
defaults for this action: the warning and critical must be specified. The warning value
|
|||
|
cannot be greater than the critical value. The output returns all databases sorted by
|
|||
|
hitratio, smallest first.
|
|||
|
|
|||
|
Example: Warn if any database on host flagg is less than 90% in hitratio, and critical if less then 80%.
|
|||
|
|
|||
|
check_postgres_database_hitratio --host=flagg --warning='90%' --critical='80%'
|
|||
|
|
|||
|
For MRTG output, returns the percentage of the database with the smallest hitratio on the first line,
|
|||
|
and the name of the database on the fourth line.
|
|||
|
|
|||
|
=head2 B<hot_standby_delay>
|
|||
|
|
|||
|
(C<symlink: check_hot_standby_delay>) Checks the streaming replication lag by computing the delta
|
|||
|
between the xlog position of a master server and the one of the slaves connected to it. The slave_
|
|||
|
server must be in hot_standby (e.g. read only) mode, therefore the minimum version to use this_
|
|||
|
action is Postgres 9.0. The I<--warning> and I<--critical> options are the delta between xlog
|
|||
|
location. These values should match the volume of transactions needed to have the streaming
|
|||
|
replication disconnect from the master because of too much lag.
|
|||
|
|
|||
|
You must provide information on how to reach the second database by a connection
|
|||
|
parameter ending in the number 2, such as "--dbport2=5543". If if it not given,
|
|||
|
the action fails.
|
|||
|
|
|||
|
=head2 B<index_size>
|
|||
|
|
|||
|
=head2 B<table_size>
|
|||
|
|
|||
|
=head2 B<relation_size>
|
|||
|
|
|||
|
(symlinks: C<check_postgres_index_size>, C<check_postgres_table_size>, and C<check_postgres_relation_size>)
|
|||
|
The actions B<table_size> and B<index_size> are simply variations of the
|
|||
|
B<relation_size> action, which checks for a relation that has grown too big.
|
|||
|
Relations (in other words, tables and indexes) can be filtered with the
|
|||
|
I<--include> and I<--exclude> options. See the L</"BASIC FILTERING"> section
|
|||
|
for more details. Relations can also be filtered by the user that owns them,
|
|||
|
by using the I<--includeuser> and I<--excludeuser> options.
|
|||
|
See the L</"USER NAME FILTERING"> section for more details.
|
|||
|
|
|||
|
The values for the I<--warning> and I<--critical> options are file sizes, and
|
|||
|
may have units of bytes, kilobytes, megabytes, gigabytes, terabytes, or exabytes.
|
|||
|
Each can be abbreviated to the first letter. If no units are given, bytes are
|
|||
|
assumed. There are no default values: both the warning and the critical option
|
|||
|
must be given. The return text shows the size of the largest relation found.
|
|||
|
|
|||
|
If the I<--showperf> option is enabled, I<all> of the relations with their sizes
|
|||
|
will be given. To prevent this, it is recommended that you set the
|
|||
|
I<--perflimit> option, which will cause the query to do a
|
|||
|
C<ORDER BY size DESC LIMIT (perflimit)>.
|
|||
|
|
|||
|
Example 1: Give a critical if any table is larger than 600MB on host burrick.
|
|||
|
|
|||
|
check_postgres_table_size --critical='600 MB' --warning='600 MB' --host=burrick
|
|||
|
|
|||
|
Example 2: Warn if the table products is over 4 GB in size, and give a critical at 4.5 GB.
|
|||
|
|
|||
|
check_postgres_table_size --host=burrick --warning='4 GB' --critical='4.5 GB' --include=products
|
|||
|
|
|||
|
Example 3: Warn if any index not owned by postgres goes over 500 MB.
|
|||
|
|
|||
|
check_postgres_index_size --port=5432 --excludeuser=postgres -w 500MB -c 600MB
|
|||
|
|
|||
|
For MRTG output, returns the size in bytes of the largest relation, and the name of the database
|
|||
|
and relation as the fourth line.
|
|||
|
|
|||
|
=head2 B<last_analyze>
|
|||
|
|
|||
|
=head2 B<last_vacuum>
|
|||
|
|
|||
|
=head2 B<last_autoanalyze>
|
|||
|
|
|||
|
=head2 B<last_autovacuum>
|
|||
|
|
|||
|
(symlinks: C<check_postgres_last_analyze>, C<check_postgres_last_vacuum>,
|
|||
|
C<check_postgres_last_autoanalyze>, and C<check_postgres_last_autovacuum>)
|
|||
|
Checks how long it has been since vacuum (or analyze) was last run on each
|
|||
|
table in one or more databases. Use of these actions requires that the target
|
|||
|
database is version 8.3 or greater, or that the version is 8.2 and the
|
|||
|
configuration variable B<stats_row_level> has been enabled. Tables can be filtered with the
|
|||
|
I<--include> and I<--exclude> options. See the L</"BASIC FILTERING"> section
|
|||
|
for more details.
|
|||
|
Tables can also be filtered by their owner by use of the
|
|||
|
I<--includeuser> and I<--excludeuser> options.
|
|||
|
See the L</"USER NAME FILTERING"> section for more details.
|
|||
|
|
|||
|
The units for I<--warning> and I<--critical> are specified as times.
|
|||
|
Valid units are seconds, minutes, hours, and days; all can be abbreviated
|
|||
|
to the first letter. If no units are given, 'seconds' are assumed. The
|
|||
|
default values are '1 day' and '2 days'. Please note that there are cases
|
|||
|
in which this field does not get automatically populated. If certain tables
|
|||
|
are giving you problems, make sure that they have dead rows to vacuum,
|
|||
|
or just exclude them from the test.
|
|||
|
|
|||
|
The schema named 'information_schema' is excluded from this test, as the only tables
|
|||
|
it contains are small and do not change.
|
|||
|
|
|||
|
Note that the non-'auto' versions will also check on the auto versions as well. In other words,
|
|||
|
using last_vacuum will report on the last vacuum, whether it was a normal vacuum, or
|
|||
|
one run by the autovacuum daemon.
|
|||
|
|
|||
|
Example 1: Warn if any table has not been vacuumed in 3 days, and give a
|
|||
|
critical at a week, for host wormwood
|
|||
|
|
|||
|
check_postgres_last_vacuum --host=wormwood --warning='3d' --critical='7d'
|
|||
|
|
|||
|
Example 2: Same as above, but skip tables belonging to the users 'eve' or 'mallory'
|
|||
|
|
|||
|
check_postgres_last_vacuum --host=wormwood --warning='3d' --critical='7d' --excludeusers=eve,mallory
|
|||
|
|
|||
|
For MRTG output, returns (on the first line) the LEAST amount of time in seconds since a table was
|
|||
|
last vacuumed or analyzed. The fourth line returns the name of the database and name of the table.
|
|||
|
|
|||
|
=head2 B<listener>
|
|||
|
|
|||
|
(C<symlink: check_postgres_listener>) Confirm that someone is listening for one or more
|
|||
|
specific strings (using the LISTEN/NOTIFY system), by looking at the pg_listener table.
|
|||
|
Only one of warning or critical is needed. The format is a simple string representing the
|
|||
|
LISTEN target, or a tilde character followed by a string for a regular expression check.
|
|||
|
Note that this check will not work on versions of Postgres 9.0 or higher.
|
|||
|
|
|||
|
Example 1: Give a warning if nobody is listening for the string bucardo_mcp_ping on ports 5555 and 5556
|
|||
|
|
|||
|
check_postgres_listener --port=5555,5556 --warning=bucardo_mcp_ping
|
|||
|
|
|||
|
Example 2: Give a critical if there are no active LISTEN requests matching 'grimm' on database oskar
|
|||
|
|
|||
|
check_postgres_listener --db oskar --critical=~grimm
|
|||
|
|
|||
|
For MRTG output, returns a 1 or a 0 on the first, indicating success or failure. The name of the notice must
|
|||
|
be provided via the I<--mrtg> option.
|
|||
|
|
|||
|
=head2 B<locks>
|
|||
|
|
|||
|
(C<symlink: check_postgres_locks>) Check the total number of locks on one or more databases. There is no
|
|||
|
need to run this more than once per database cluster. Databases can be filtered
|
|||
|
with the I<--include> and I<--exclude> options. See the L</"BASIC FILTERING"> section
|
|||
|
for more details.
|
|||
|
|
|||
|
The I<--warning> and I<--critical> options can be specified as simple numbers,
|
|||
|
which represent the total number of locks, or they can be broken down by type of lock.
|
|||
|
Valid lock names are C<'total'>, C<'waiting'>, or the name of a lock type used by Postgres.
|
|||
|
These names are case-insensitive and do not need the "lock" part on the end,
|
|||
|
so B<exclusive> will match 'ExclusiveLock'. The format is name=number, with different
|
|||
|
items separated by semicolons.
|
|||
|
|
|||
|
Example 1: Warn if the number of locks is 100 or more, and critical if 200 or more, on host garrett
|
|||
|
|
|||
|
check_postgres_locks --host=garrett --warning=100 --critical=200
|
|||
|
|
|||
|
Example 2: On the host artemus, warn if 200 or more locks exist, and give a critical if over 250 total locks exist, or if over 20 exclusive locks exist, or if over 5 connections are waiting for a lock.
|
|||
|
|
|||
|
check_postgres_locks --host=artemus --warning=200 --critical="total=250;waiting=5;exclusive=20"
|
|||
|
|
|||
|
For MRTG output, returns the number of locks on the first line, and the name of the database on the fourth line.
|
|||
|
|
|||
|
=head2 B<logfile>
|
|||
|
|
|||
|
(C<symlink: check_postgres_logfile>) Ensures that the logfile is in the expected location and is being logged to.
|
|||
|
This action issues a command that throws an error on each database it is
|
|||
|
checking, and ensures that the message shows up in the logs. It scans the
|
|||
|
various log_* settings inside of Postgres to figure out where the logs should be.
|
|||
|
If you are using syslog, it does a rough (but not foolproof) scan of
|
|||
|
F</etc/syslog.conf>. Alternatively, you can provide the name of the logfile
|
|||
|
with the I<--logfile> option. This is especially useful if the logs have a
|
|||
|
custom rotation scheme driven be an external program. The B<--logfile> option
|
|||
|
supports the following escape characters: C<%Y %m %d %H>, which represent
|
|||
|
the current year, month, date, and hour respectively. An error is always
|
|||
|
reported as critical unless the warning option has been passed in as a non-zero
|
|||
|
value. Other than that specific usage, the C<--warning> and C<--critical>
|
|||
|
options should I<not> be used.
|
|||
|
|
|||
|
Example 1: On port 5432, ensure the logfile is being written to the file /home/greg/pg8.2.log
|
|||
|
|
|||
|
check_postgres_logfile --port=5432 --logfile=/home/greg/pg8.2.log
|
|||
|
|
|||
|
Example 2: Same as above, but raise a warning, not a critical
|
|||
|
|
|||
|
check_postgres_logfile --port=5432 --logfile=/home/greg/pg8.2.log -w 1
|
|||
|
|
|||
|
For MRTG output, returns a 1 or 0 on the first line, indicating success or failure. In case of a
|
|||
|
failure, the fourth line will provide more detail on the failure encountered.
|
|||
|
|
|||
|
=head2 B<new_version_bc>
|
|||
|
|
|||
|
(C<symlink: check_postgres_new_version_bc>) Checks if a newer version of the Bucardo
|
|||
|
program is available. The current version is obtained by running C<bucardo_ctl --version>.
|
|||
|
If a major upgrade is available, a warning is returned. If a revision upgrade is
|
|||
|
available, a critical is returned. (Bucardo is a master to slave, and master to master
|
|||
|
replication system for Postgres: see http://bucardo.org for more information).
|
|||
|
See also the information on the C<--get_method> option.
|
|||
|
|
|||
|
=head2 B<new_version_box>
|
|||
|
|
|||
|
(C<symlink: check_postgres_new_version_box>) Checks if a newer version of the boxinfo
|
|||
|
program is available. The current version is obtained by running C<boxinfo.pl --version>.
|
|||
|
If a major upgrade is available, a warning is returned. If a revision upgrade is
|
|||
|
available, a critical is returned. (boxinfo is a program for grabbing important
|
|||
|
information from a server and putting it into a HTML format: see
|
|||
|
http://bucardo.org/wiki/boxinfo for more information). See also the information on
|
|||
|
the C<--get_method> option.
|
|||
|
|
|||
|
=head2 B<new_version_cp>
|
|||
|
|
|||
|
(C<symlink: check_postgres_new_version_cp>) Checks if a newer version of this program
|
|||
|
(check_postgres.pl) is available, by grabbing the version from a small text file
|
|||
|
on the main page of the home page for the project. Returns a warning if the returned
|
|||
|
version does not match the one you are running. Recommended interval to check is
|
|||
|
once a day. See also the information on the C<--get_method> option.
|
|||
|
|
|||
|
=head2 B<new_version_pg>
|
|||
|
|
|||
|
(C<symlink: check_postgres_new_version_pg>) Checks if a newer revision of Postgres
|
|||
|
exists for each database connected to. Note that this only checks for revision, e.g.
|
|||
|
going from 8.3.6 to 8.3.7. Revisions are always 100% binary compatible and involve no
|
|||
|
dump and restore to upgrade. Revisions are made to address bugs, so upgrading as soon
|
|||
|
as possible is always recommended. Returns a warning if you do not have the latest revision.
|
|||
|
It is recommended this check is run at least once a day. See also the information on
|
|||
|
the C<--get_method> option.
|
|||
|
|
|||
|
|
|||
|
=head2 B<new_version_tnm>
|
|||
|
|
|||
|
(C<symlink: check_postgres_new_version_tnm>) Checks if a newer version of the
|
|||
|
tail_n_mail program is available. The current version is obtained by running
|
|||
|
C<tail_n_mail --version>. If a major upgrade is available, a warning is returned. If a
|
|||
|
revision upgrade is available, a critical is returned. (tail_n_mail is a log monitoring
|
|||
|
tool that can send mail when interesting events appear in your Postgres logs.
|
|||
|
See: http://bucardo.org/wiki/Tail_n_mail for more information).
|
|||
|
See also the information on the C<--get_method> option.
|
|||
|
|
|||
|
=head2 B<pgb_pool_cl_active>
|
|||
|
|
|||
|
=head2 B<pgb_pool_cl_waiting>
|
|||
|
|
|||
|
=head2 B<pgb_pool_sv_active>
|
|||
|
|
|||
|
=head2 B<pgb_pool_sv_idle>
|
|||
|
|
|||
|
=head2 B<pgb_pool_sv_used>
|
|||
|
|
|||
|
=head2 B<pgb_pool_sv_tested>
|
|||
|
|
|||
|
=head2 B<pgb_pool_sv_login>
|
|||
|
|
|||
|
=head2 B<pgb_pool_maxwait>
|
|||
|
|
|||
|
(symlinks: C<check_postgres_pgb_pool_cl_active>, C<check_postgres_pgb_pool_cl_waiting>,
|
|||
|
C<check_postgres_pgb_pool_sv_active>, C<check_postgres_pgb_pool_sv_idle>,
|
|||
|
C<check_postgres_pgb_pool_sv_used>, C<check_postgres_pgb_pool_sv_tested>,
|
|||
|
C<check_postgres_pgb_pool_sv_login>, and C<check_postgres_pgb_pool_maxwait>)
|
|||
|
|
|||
|
Examines pgbouncer's pool statistics. Each pool has a set of "client"
|
|||
|
connections, referring to connections from external clients, and "server"
|
|||
|
connections, referring to connections to PostgreSQL itself. The related
|
|||
|
check_postgres actions are prefixed by "cl_" and "sv_", respectively. Active
|
|||
|
client connections are those connections currently linked with an active server
|
|||
|
connection. Client connections may also be "waiting", meaning they have not yet
|
|||
|
been allocated a server connection. Server connections are "active" (linked to
|
|||
|
a client), "idle" (standing by for a client connection to link with), "used"
|
|||
|
(just unlinked from a client, and not yet returned to the idle pool), "tested"
|
|||
|
(currently being tested) and "login" (in the process of logging in). The
|
|||
|
maxwait value shows how long in seconds the oldest waiting client connection
|
|||
|
has been waiting.
|
|||
|
|
|||
|
=head2 B<pgbouncer_backends>
|
|||
|
|
|||
|
(C<symlink: check_postgres_pgbouncer_backends>) Checks the current number of
|
|||
|
connections for one or more databases through pgbouncer, and optionally
|
|||
|
compares it to the maximum allowed, which is determined by the pgbouncer
|
|||
|
configuration variable B<max_client_conn>. The I<--warning> and I<--critical>
|
|||
|
options can take one of three forms. First, a simple number can be given,
|
|||
|
which represents the number of connections at which the alert will be given.
|
|||
|
This choice does not use the B<max_connections> setting. Second, the
|
|||
|
percentage of available connections can be given. Third, a negative number can
|
|||
|
be given which represents the number of connections left until
|
|||
|
B<max_connections> is reached. The default values for I<--warning> and
|
|||
|
I<--critical> are '90%' and '95%'. You can also filter the databases by use
|
|||
|
of the I<--include> and I<--exclude> options. See the L</"BASIC FILTERING">
|
|||
|
section for more details.
|
|||
|
|
|||
|
To view only non-idle processes, you can use the I<--noidle> argument. Note
|
|||
|
that the user you are connecting as must be a superuser for this to work
|
|||
|
properly.
|
|||
|
|
|||
|
Example 1: Give a warning when the number of connections on host quirm reaches
|
|||
|
120, and a critical if it reaches 150.
|
|||
|
|
|||
|
check_postgres_pgbouncer_backends --host=quirm --warning=120 --critical=150 -p 6432 -u pgbouncer
|
|||
|
|
|||
|
Example 2: Give a critical when we reach 75% of our max_connections setting on
|
|||
|
hosts lancre or lancre2.
|
|||
|
|
|||
|
check_postgres_pgbouncer_backends --warning='75%' --critical='75%' --host=lancre,lancre2 -p 6432 -u pgbouncer
|
|||
|
|
|||
|
Example 3: Give a warning when there are only 10 more connection slots left on
|
|||
|
host plasmid, and a critical when we have only 5 left.
|
|||
|
|
|||
|
check_postgres_pgbouncer_backends --warning=-10 --critical=-5 --host=plasmid -p 6432 -u pgbouncer
|
|||
|
|
|||
|
For MRTG output, the number of connections is reported on the first line, and
|
|||
|
the fourth line gives the name of the database, plus the current
|
|||
|
max_client_conn. If more than one database has been queried, the one with the
|
|||
|
highest number of connections is output.
|
|||
|
|
|||
|
=head2 B<pgbouncer_checksum>
|
|||
|
|
|||
|
(C<symlink: check_postgres_pgbouncer_checksum>) Checks that all the
|
|||
|
pgBouncer settings are the same as last time you checked.
|
|||
|
This is done by generating a checksum of a sorted list of setting names and
|
|||
|
their values. Note that you shouldn't specify the database name, it will
|
|||
|
automatically default to pgbouncer. Either the I<--warning> or the I<--critical> option
|
|||
|
should be given, but not both. The value of each one is the checksum, a
|
|||
|
32-character hexadecimal value. You can run with the special C<--critical=0> option
|
|||
|
to find out an existing checksum.
|
|||
|
|
|||
|
This action requires the Digest::MD5 module.
|
|||
|
|
|||
|
Example 1: Find the initial checksum for pgbouncer configuration on port 6432 using the default user (usually postgres)
|
|||
|
|
|||
|
check_postgres_pgbouncer_checksum --port=6432 --critical=0
|
|||
|
|
|||
|
Example 2: Make sure no settings have changed and warn if so, using the checksum from above.
|
|||
|
|
|||
|
check_postgres_pgbouncer_checksum --port=6432 --warning=cd2f3b5e129dc2b4f5c0f6d8d2e64231
|
|||
|
|
|||
|
For MRTG output, returns a 1 or 0 indicating success of failure of the checksum to match. A
|
|||
|
checksum must be provided as the C<--mrtg> argument. The fourth line always gives the
|
|||
|
current checksum.
|
|||
|
|
|||
|
=head2 B<prepared_txns>
|
|||
|
|
|||
|
(C<symlink: check_postgres_prepared_txns>) Check on the age of any existing prepared transactions.
|
|||
|
Note that most people will NOT use prepared transactions, as they are part of two-part commit
|
|||
|
and complicated to maintain. They should also not be confused with prepared STATEMENTS, which is
|
|||
|
what most people think of when they hear prepare. The default value for a warning is 1 second, to
|
|||
|
detect any use of prepared transactions, which is probably a mistake on most systems. Warning and
|
|||
|
critical are the number of seconds a prepared transaction has been open before an alert is given.
|
|||
|
|
|||
|
Example 1: Give a warning on detecting any prepared transactions:
|
|||
|
|
|||
|
check_postgres_prepared_txns -w 0
|
|||
|
|
|||
|
Example 2: Give a critical if any prepared transaction has been open longer than 10 seconds, but allow
|
|||
|
up to 360 seconds for the database 'shrike':
|
|||
|
|
|||
|
check_postgres_prepared_txns --critical=10 --exclude=shrike
|
|||
|
check_postgres_prepared_txns --critical=360 --include=shrike
|
|||
|
|
|||
|
For MRTG output, returns the number of seconds the oldest transaction has been open as the first line,
|
|||
|
and which database is came from as the final line.
|
|||
|
|
|||
|
=head2 B<query_runtime>
|
|||
|
|
|||
|
(C<symlink: check_postgres_query_runtime>) Checks how long a specific query takes to run, by executing a "EXPLAIN ANALYZE"
|
|||
|
against it. The I<--warning> and I<--critical> options are the maximum amount of
|
|||
|
time the query should take. Valid units are seconds, minutes, and hours; any can be
|
|||
|
abbreviated to the first letter. If no units are given, 'seconds' are assumed.
|
|||
|
Both the warning and the critical option must be given. The name of the view or
|
|||
|
function to be run must be passed in to the I<--queryname> option. It must consist
|
|||
|
of a single word (or schema.word), with optional parens at the end.
|
|||
|
|
|||
|
Example 1: Give a critical if the function named "speedtest" fails to run in 10 seconds or less.
|
|||
|
|
|||
|
check_postgres_query_runtime --queryname='speedtest()' --critical=10 --warning=10
|
|||
|
|
|||
|
For MRTG output, reports the time in seconds for the query to complete on the first line. The fourth
|
|||
|
line lists the database.
|
|||
|
|
|||
|
=head2 B<query_time>
|
|||
|
|
|||
|
(C<symlink: check_postgres_query_time>) Checks the length of running queries on one or more databases.
|
|||
|
There is no need to run this more than once on the same database cluster. Note that
|
|||
|
this already excludes queries that are "idle in transaction". Databases can be filtered
|
|||
|
by using the I<--include> and I<--exclude> options. See the L</"BASIC FILTERING">
|
|||
|
section for more details. You can also filter on the user running the
|
|||
|
query with the I<--includeuser> and I<--excludeuser> options.
|
|||
|
See the L</"USER NAME FILTERING"> section for more details.
|
|||
|
|
|||
|
The values for the I<--warning> and I<--critical> options are amounts of
|
|||
|
time, and default to '2 minutes' and '5 minutes' respectively. Valid units
|
|||
|
are 'seconds', 'minutes', 'hours', or 'days'. Each may be written singular or
|
|||
|
abbreviated to just the first letter. If no units are given, the unit is
|
|||
|
assumed to be seconds.
|
|||
|
|
|||
|
This action requires Postgres 8.1 or better.
|
|||
|
|
|||
|
Example 1: Give a warning if any query has been running longer than 3 minutes, and a critical if longer than 5 minutes.
|
|||
|
|
|||
|
check_postgres_query_time --port=5432 --warning='3 minutes' --critical='5 minutes'
|
|||
|
|
|||
|
Example 2: Using default values (2 and 5 minutes), check all databases except those starting with 'template'.
|
|||
|
|
|||
|
check_postgres_query_time --port=5432 --exclude=~^template
|
|||
|
|
|||
|
Example 3: Warn if user 'don' has a query running over 20 seconds
|
|||
|
|
|||
|
check_postgres_query_time --port=5432 --includeuser=don --warning=20s
|
|||
|
|
|||
|
For MRTG output, returns the length in seconds of the longest running query on the first line. The fourth
|
|||
|
line gives the name of the database.
|
|||
|
|
|||
|
=head2 B<replicate_row>
|
|||
|
|
|||
|
(C<symlink: check_postgres_replicate_row>) Checks that master-slave replication is working to one or more slaves.
|
|||
|
The slaves are specified the same as the normal databases, except with
|
|||
|
the number 2 at the end of them, so "--port2" instead of "--port", etc.
|
|||
|
The values or the I<--warning> and I<--critical> options are units of time, and
|
|||
|
at least one must be provided (no defaults). Valid units are 'seconds', 'minutes', 'hours',
|
|||
|
or 'days'. Each may be written singular or abbreviated to just the first letter.
|
|||
|
If no units are given, the units are assumed to be seconds.
|
|||
|
|
|||
|
This check updates a single row on the master, and then measures how long it
|
|||
|
takes to be applied to the slaves. To do this, you need to pick a table that
|
|||
|
is being replicated, then find a row that can be changed, and is not going
|
|||
|
to be changed by any other process. A specific column of this row will be changed
|
|||
|
from one value to another. All of this is fed to the C<repinfo> option, and should
|
|||
|
contain the following options, separated by commas: table name, primary key, key id,
|
|||
|
column, first value, second value.
|
|||
|
|
|||
|
Example 1: Slony is replicating a table named 'orders' from host 'alpha' to
|
|||
|
host 'beta', in the database 'sales'. The primary key of the table is named
|
|||
|
id, and we are going to test the row with an id of 3 (which is historical and
|
|||
|
never changed). There is a column named 'salesrep' that we are going to toggle
|
|||
|
from a value of 'slon' to 'nols' to check on the replication. We want to throw
|
|||
|
a warning if the replication does not happen within 10 seconds.
|
|||
|
|
|||
|
check_postgres_replicate_row --host=alpha --dbname=sales --host2=beta
|
|||
|
--dbname2=sales --warning=10 --repinfo=orders,id,3,salesrep,slon,nols
|
|||
|
|
|||
|
Example 2: Bucardo is replicating a table named 'receipt' from host 'green'
|
|||
|
to hosts 'red', 'blue', and 'yellow'. The database for both sides is 'public'.
|
|||
|
The slave databases are running on port 5455. The primary key is named 'receipt_id',
|
|||
|
the row we want to use has a value of 9, and the column we want to change for the
|
|||
|
test is called 'zone'. We'll toggle between 'north' and 'south' for the value of
|
|||
|
this column, and throw a critical if the change is not on all three slaves within 5 seconds.
|
|||
|
|
|||
|
check_postgres_replicate_row --host=green --port2=5455 --host2=red,blue,yellow
|
|||
|
--critical=5 --repinfo=receipt,receipt_id,9,zone,north,south
|
|||
|
|
|||
|
For MRTG output, returns on the first line the time in seconds the replication takes to finish.
|
|||
|
The maximum time is set to 4 minutes 30 seconds: if no replication has taken place in that long
|
|||
|
a time, an error is thrown.
|
|||
|
|
|||
|
=head2 B<same_schema>
|
|||
|
|
|||
|
(C<symlink: check_postgres_same_schema>) Verifies that two or more databases are identical as far as their
|
|||
|
schema (but not the data within). This is particularly handy for making sure your slaves have not
|
|||
|
been modified or corrupted in any way when using master to slave replication. Unlike most other
|
|||
|
actions, this has no warning or critical criteria - the databases are either in sync, or are not.
|
|||
|
If they are different, a detailed list of the differences is presented.
|
|||
|
|
|||
|
You may want to exclude or filter out certain differences. The way to do this is to add strings
|
|||
|
to the C<--filter> option. To exclude a type of object, use "noname", where 'name' is the type of
|
|||
|
object, for example, "noschema". To exclude objects of a certain type by a regular expression against
|
|||
|
their name, use "noname=regex". See the examples below for a better understanding.
|
|||
|
|
|||
|
The types of objects that can be filtered include:
|
|||
|
|
|||
|
=over 4
|
|||
|
|
|||
|
=item user
|
|||
|
|
|||
|
=item schema
|
|||
|
|
|||
|
=item table
|
|||
|
|
|||
|
=item view
|
|||
|
|
|||
|
=item index
|
|||
|
|
|||
|
=item sequence
|
|||
|
|
|||
|
=item constraint
|
|||
|
|
|||
|
=item trigger
|
|||
|
|
|||
|
=item function
|
|||
|
|
|||
|
=back
|
|||
|
|
|||
|
The filter option "noposition" prevents verification of the position of
|
|||
|
columns within a table.
|
|||
|
|
|||
|
The filter option "nofuncbody" prevents comparison of the bodies of all
|
|||
|
functions.
|
|||
|
|
|||
|
The filter option "noperm" prevents comparison of object permissions.
|
|||
|
|
|||
|
To provide the second database, just append the differences to the first one
|
|||
|
by a call to the appropriate connection argument. For example, to compare
|
|||
|
databases on hosts alpha and bravo, use "--dbhost=alpha,bravo". Also see the
|
|||
|
examples below.
|
|||
|
|
|||
|
If only a single host is given, it is assumed we are doing a "time-based" report.
|
|||
|
The first time this is run a snapshot of all the items in the database is
|
|||
|
saved to a local file. When you run it again, that snapshot is read in and
|
|||
|
becomes "database #2" and is compared to the current database.
|
|||
|
|
|||
|
To replace the old stored file with the new version, use the --replace argument.
|
|||
|
|
|||
|
To enable snapshots at various points in time, you can use the "--suffix"
|
|||
|
argument to make the filenames unique to each run. See the examples below.
|
|||
|
|
|||
|
Example 1: Verify that two databases on hosts star and line are the same:
|
|||
|
|
|||
|
check_postgres_same_schema --dbhost=star,line
|
|||
|
|
|||
|
Example 2: Same as before, but exclude any triggers with "slony" in their name
|
|||
|
|
|||
|
check_postgres_same_schema --dbhost=star,line --filter="notrigger=slony"
|
|||
|
|
|||
|
Example 3: Same as before, but also exclude all indexes
|
|||
|
|
|||
|
check_postgres_same_schema --dbhost=star,line --filter="notrigger=slony noindexes"
|
|||
|
|
|||
|
Example 4: Check differences for the database "battlestar" on different ports
|
|||
|
|
|||
|
check_postgres_same_schema --dbname=battlestar --dbport=5432,5544
|
|||
|
|
|||
|
Example 5: Create a daily and weekly snapshot file
|
|||
|
|
|||
|
check_postgres_same_schema --dbname=cylon --suffix=daily
|
|||
|
check_postgres_same_schema --dbname=cylon --suffix=weekly
|
|||
|
|
|||
|
Example 6: Run a historical comparison, then replace the file
|
|||
|
|
|||
|
check_postgres_same_schema --dbname=cylon --suffix=daily --replace
|
|||
|
|
|||
|
=head2 B<sequence>
|
|||
|
|
|||
|
(C<symlink: check_postgres_sequence>) Checks how much room is left on all sequences in the database.
|
|||
|
This is measured as the percent of total possible values that have been used for each sequence.
|
|||
|
The I<--warning> and I<--critical> options should be expressed as percentages. The default values
|
|||
|
are B<85%> for the warning and B<95%> for the critical. You may use --include and --exclude to
|
|||
|
control which sequences are to be checked. Note that this check does account for unusual B<minvalue>
|
|||
|
and B<increment by> values, but does not care if the sequence is set to cycle or not.
|
|||
|
|
|||
|
The output for Nagios gives the name of the sequence, the percentage used, and the number of 'calls'
|
|||
|
left, indicating how many more times nextval can be called on that sequence before running into
|
|||
|
the maximum value.
|
|||
|
|
|||
|
The output for MRTG returns the highest percentage across all sequences on the first line, and
|
|||
|
the name of each sequence with that percentage on the fourth line, separated by a "|" (pipe)
|
|||
|
if there are more than one sequence at that percentage.
|
|||
|
|
|||
|
Example 1: Give a warning if any sequences are approaching 95% full.
|
|||
|
|
|||
|
check_postgres_sequence --dbport=5432 --warning=95%
|
|||
|
|
|||
|
Example 2: Check that the sequence named "orders_id_seq" is not more than half full.
|
|||
|
|
|||
|
check_postgres_sequence --dbport=5432 --critical=50% --include=orders_id_seq
|
|||
|
|
|||
|
=head2 B<settings_checksum>
|
|||
|
|
|||
|
(C<symlink: check_postgres_settings_checksum>) Checks that all the Postgres settings are the same as last time you checked.
|
|||
|
This is done by generating a checksum of a sorted list of setting names and
|
|||
|
their values. Note that different users in the same database may have different
|
|||
|
checksums, due to ALTER USER usage, and due to the fact that superusers see more
|
|||
|
settings than ordinary users. Either the I<--warning> or the I<--critical> option
|
|||
|
should be given, but not both. The value of each one is the checksum, a
|
|||
|
32-character hexadecimal value. You can run with the special C<--critical=0> option
|
|||
|
to find out an existing checksum.
|
|||
|
|
|||
|
This action requires the Digest::MD5 module.
|
|||
|
|
|||
|
Example 1: Find the initial checksum for the database on port 5555 using the default user (usually postgres)
|
|||
|
|
|||
|
check_postgres_settings_checksum --port=5555 --critical=0
|
|||
|
|
|||
|
Example 2: Make sure no settings have changed and warn if so, using the checksum from above.
|
|||
|
|
|||
|
check_postgres_settings_checksum --port=5555 --warning=cd2f3b5e129dc2b4f5c0f6d8d2e64231
|
|||
|
|
|||
|
For MRTG output, returns a 1 or 0 indicating success of failure of the checksum to match. A
|
|||
|
checksum must be provided as the C<--mrtg> argument. The fourth line always gives the
|
|||
|
current checksum.
|
|||
|
|
|||
|
=head2 B<slony_status>
|
|||
|
|
|||
|
(C<symlink: check_postgres_slony_status>) Checks in the status of a Slony cluster by looking
|
|||
|
at the results of Slony's sl_status view. This is returned as the number of seconds of "lag time".
|
|||
|
The I<--warning> and I<--critical> options should be expressed as times. The default values
|
|||
|
are B<60 seconds> for the warning and B<300 seconds> for the critical.
|
|||
|
|
|||
|
The optional argument I<--schema> indicated the schema that Slony is installed under. If it is
|
|||
|
not given, the schema will be determined automatically each time this check is run.
|
|||
|
|
|||
|
Example 1: Give a warning if any Slony is lagged by more than 20 seconds
|
|||
|
|
|||
|
check_postgres_slony_status --warning 20
|
|||
|
|
|||
|
Example 2: Give a critical if Slony, installed under the schema "_slony", is over 10 minutes lagged
|
|||
|
|
|||
|
check_postgres_slony_status --schema=_slony --critical=600
|
|||
|
|
|||
|
=head2 B<timesync>
|
|||
|
|
|||
|
(C<symlink: check_postgres_timesync>) Compares the local system time with the time reported by one or more databases.
|
|||
|
The I<--warning> and I<--critical> options represent the number of seconds between
|
|||
|
the two systems before an alert is given. If neither is specified, the default values
|
|||
|
are used, which are '2' and '5'. The warning value cannot be greater than the critical
|
|||
|
value. Due to the non-exact nature of this test, values of '0' or '1' are not recommended.
|
|||
|
|
|||
|
The string returned shows the time difference as well as the time on each side written out.
|
|||
|
|
|||
|
Example 1: Check that databases on hosts ankh, morpork, and klatch are no more than 3 seconds off from the local time:
|
|||
|
|
|||
|
check_postgres_timesync --host=ankh,morpork,klatch --critical=3
|
|||
|
|
|||
|
For MRTG output, returns one the first line the number of seconds difference between the local
|
|||
|
time and the database time. The fourth line returns the name of the database.
|
|||
|
|
|||
|
=head2 B<txn_idle>
|
|||
|
|
|||
|
(C<symlink: check_postgres_txn_idle>) Checks the number and duration of "idle
|
|||
|
in transaction" queries on one or more databases. There is no need to run this
|
|||
|
more than once on the same database cluster. Databases can be filtered by
|
|||
|
using the I<--include> and I<--exclude> options. See the L</"BASIC FILTERING">
|
|||
|
section below for more details.
|
|||
|
|
|||
|
The I<--warning> and I<--critical> options are given as units of time, signed
|
|||
|
integers, or integers for units of time, and both must be provided (there are
|
|||
|
no defaults). Valid units are 'seconds', 'minutes', 'hours', or 'days'. Each
|
|||
|
may be written singular or abbreviated to just the first letter. If no units
|
|||
|
are given and the numbers are unsigned, the units are assumed to be seconds.
|
|||
|
|
|||
|
This action requires Postgres 8.3 or better.
|
|||
|
|
|||
|
Example 1: Give a warning if any connection has been idle in transaction for more than 15 seconds:
|
|||
|
|
|||
|
check_postgres_txn_idle --port=5432 --warning='15 seconds'
|
|||
|
|
|||
|
Example 2: Give a warning if there are 50 or more transactions
|
|||
|
|
|||
|
check_postgres_txn_idle --port=5432 --warning='+50'
|
|||
|
|
|||
|
Example 3: Give a critical if 5 or more connections have been idle in
|
|||
|
transaction for more than 10 seconds:
|
|||
|
|
|||
|
check_postgres_txn_idle --port=5432 --critical='5 for 10 seconds'
|
|||
|
|
|||
|
For MRTG output, returns the time in seconds the longest idle transaction has been running. The fourth
|
|||
|
line returns the name of the database and other information about the longest transaction.
|
|||
|
|
|||
|
=head2 B<txn_time>
|
|||
|
|
|||
|
(C<symlink: check_postgres_txn_time>) Checks the length of open transactions on one or more databases.
|
|||
|
There is no need to run this command more than once per database cluster.
|
|||
|
Databases can be filtered by use of the
|
|||
|
I<--include> and I<--exclude> options. See the L</"BASIC FILTERING"> section
|
|||
|
for more details. The owner of the transaction can also be filtered, by use of
|
|||
|
the I<--includeuser> and I<--excludeuser> options.
|
|||
|
See the L</"USER NAME FILTERING"> section for more details.
|
|||
|
|
|||
|
The values or the I<--warning> and I<--critical> options are units of time, and
|
|||
|
must be provided (no default). Valid units are 'seconds', 'minutes', 'hours',
|
|||
|
or 'days'. Each may be written singular or abbreviated to just the first letter.
|
|||
|
If no units are given, the units are assumed to be seconds.
|
|||
|
|
|||
|
This action requires Postgres 8.3 or better.
|
|||
|
|
|||
|
Example 1: Give a critical if any transaction has been open for more than 10 minutes:
|
|||
|
|
|||
|
check_postgres_txn_time --port=5432 --critical='10 minutes'
|
|||
|
|
|||
|
Example 1: Warn if user 'warehouse' has a transaction open over 30 seconds
|
|||
|
|
|||
|
check_postgres_txn_time --port-5432 --warning=30s --includeuser=warehouse
|
|||
|
|
|||
|
For MRTG output, returns the maximum time in seconds a transaction has been open on the
|
|||
|
first line. The fourth line gives the name of the database.
|
|||
|
|
|||
|
=head2 B<txn_wraparound>
|
|||
|
|
|||
|
(C<symlink: check_postgres_txn_wraparound>) Checks how close to transaction wraparound one or more databases are getting.
|
|||
|
The I<--warning> and I<--critical> options indicate the number of transactions done, and must be a positive integer.
|
|||
|
If either option is not given, the default values of 1.3 and 1.4 billion are used. There is no need to run this command
|
|||
|
more than once per database cluster. For a more detailed discussion of what this number represents and what to do about
|
|||
|
it, please visit the page
|
|||
|
L<http://www.postgresql.org/docs/current/static/routine-vacuuming.html#VACUUM-FOR-WRAPAROUND>
|
|||
|
|
|||
|
The warning and critical values can have underscores in the number for legibility, as Perl does.
|
|||
|
|
|||
|
Example 1: Check the default values for the localhost database
|
|||
|
|
|||
|
check_postgres_txn_wraparound --host=localhost
|
|||
|
|
|||
|
Example 2: Check port 6000 and give a critical when 1.7 billion transactions are hit:
|
|||
|
|
|||
|
check_postgres_txn_wraparound --port=6000 --critical=1_700_000_000
|
|||
|
|
|||
|
For MRTG output, returns the highest number of transactions for all databases on line one,
|
|||
|
while line 4 indicates which database it is.
|
|||
|
|
|||
|
=head2 B<version>
|
|||
|
|
|||
|
(C<symlink: check_postgres_version>) Checks that the required version of Postgres is running. The
|
|||
|
I<--warning> and I<--critical> options (only one is required) must be of
|
|||
|
the format B<X.Y> or B<X.Y.Z> where B<X> is the major version number,
|
|||
|
B<Y> is the minor version number, and B<Z> is the revision.
|
|||
|
|
|||
|
Example 1: Give a warning if the database on port 5678 is not version 8.4.10:
|
|||
|
|
|||
|
check_postgres_version --port=5678 -w=8.4.10
|
|||
|
|
|||
|
Example 2: Give a warning if any databases on hosts valley,grain, or sunshine is not 8.3:
|
|||
|
|
|||
|
check_postgres_version -H valley,grain,sunshine --critical=8.3
|
|||
|
|
|||
|
For MRTG output, reports a 1 or a 0 indicating success or failure on the first line. The
|
|||
|
fourth line indicates the current version. The version must be provided via the C<--mrtg> option.
|
|||
|
|
|||
|
=head2 B<wal_files>
|
|||
|
|
|||
|
(C<symlink: check_postgres_wal_files>) Checks how many WAL files exist in the F<pg_xlog> directory, which is found
|
|||
|
off of your B<data_directory>, sometimes as a symlink to another physical disk for
|
|||
|
performance reasons. This action must be run as a superuser, in order to access the
|
|||
|
contents of the F<pg_xlog> directory. The minimum version to use this action is
|
|||
|
Postgres 8.1. The I<--warning> and I<--critical> options are simply the number of
|
|||
|
files in the F<pg_xlog> directory. What number to set this to will vary, but a general
|
|||
|
guideline is to put a number slightly higher than what is normally there, to catch
|
|||
|
problems early.
|
|||
|
|
|||
|
Normally, WAL files are closed and then re-used, but a long-running open
|
|||
|
transaction, or a faulty B<archive_command> script, may cause Postgres to
|
|||
|
create too many files. Ultimately, this will cause the disk they are on to run
|
|||
|
out of space, at which point Postgres will shut down.
|
|||
|
|
|||
|
Example 1: Check that the number of WAL files is 20 or less on host "pluto"
|
|||
|
|
|||
|
check_postgres_wal_files --host=pluto --critical=20
|
|||
|
|
|||
|
For MRTG output, reports the number of WAL files on line 1.
|
|||
|
|
|||
|
=head2 B<rebuild_symlinks>
|
|||
|
|
|||
|
=head2 B<rebuild_symlinks_force>
|
|||
|
|
|||
|
This action requires no other arguments, and does not connect to any databases,
|
|||
|
but simply creates symlinks in the current directory for each action, in the form
|
|||
|
B<check_postgres_E<lt>action_nameE<gt>>.
|
|||
|
If the file already exists, it will not be overwritten. If the action is rebuild_symlinks_force,
|
|||
|
then symlinks will be overwritten. The option --symlinks is a shorter way of saying
|
|||
|
--action=rebuild_symlinks
|
|||
|
|
|||
|
=head1 BASIC FILTERING
|
|||
|
|
|||
|
The options I<--include> and I<--exclude> can be combined to limit which
|
|||
|
things are checked, depending on the action. The name of the database can
|
|||
|
be filtered when using the following actions:
|
|||
|
backends, database_size, locks, query_time, txn_idle, and txn_time.
|
|||
|
The name of a relation can be filtered when using the following actions:
|
|||
|
bloat, index_size, table_size, relation_size, last_vacuum, last_autovacuum,
|
|||
|
last_analyze, and last_autoanalyze.
|
|||
|
The name of a setting can be filtered when using the settings_checksum action.
|
|||
|
The name of a file system can be filtered when using the disk_space action.
|
|||
|
|
|||
|
If only an include option is given, then ONLY those entries that match will be
|
|||
|
checked. However, if given both exclude and include, the exclusion is done first,
|
|||
|
and the inclusion after, to reinstate things that may have been excluded. Both
|
|||
|
I<--include> and I<--exclude> can be given multiple times,
|
|||
|
and/or as comma-separated lists. A leading tilde will match the following word
|
|||
|
as a regular expression.
|
|||
|
|
|||
|
To match a schema, end the search term with a single period. Leading tildes can
|
|||
|
be used for schemas as well.
|
|||
|
|
|||
|
Be careful when using filtering: an inclusion rule on the backends, for example,
|
|||
|
may report no problems not only because the matching database had no backends,
|
|||
|
but because you misspelled the name of the database!
|
|||
|
|
|||
|
Examples:
|
|||
|
|
|||
|
Only checks items named pg_class:
|
|||
|
|
|||
|
--include=pg_class
|
|||
|
|
|||
|
Only checks items containing the letters 'pg_':
|
|||
|
|
|||
|
--include=~pg_
|
|||
|
|
|||
|
Only check items beginning with 'pg_':
|
|||
|
|
|||
|
--include=~^pg_
|
|||
|
|
|||
|
Exclude the item named 'test':
|
|||
|
|
|||
|
--exclude=test
|
|||
|
|
|||
|
Exclude all items containing the letters 'test:
|
|||
|
|
|||
|
--exclude=~test
|
|||
|
|
|||
|
Exclude all items in the schema 'pg_catalog':
|
|||
|
|
|||
|
--exclude='pg_catalog.'
|
|||
|
|
|||
|
Exclude all items containing the letters 'ace', but allow the item 'faceoff':
|
|||
|
|
|||
|
--exclude=~ace --include=faceoff
|
|||
|
|
|||
|
Exclude all items which start with the letters 'pg_', which contain the letters 'slon',
|
|||
|
or which are named 'sql_settings' or 'green'. Specifically check items with the letters 'prod' in their names, and always check the item named 'pg_relname':
|
|||
|
|
|||
|
--exclude=~^pg_,~slon,sql_settings --exclude=green --include=~prod,pg_relname
|
|||
|
|
|||
|
=head1 USER NAME FILTERING
|
|||
|
|
|||
|
The options I<--includeuser> and I<--excludeuser> can be used on some actions
|
|||
|
to only examine database objects owned by (or not owned by) one or more users.
|
|||
|
An I<--includeuser> option always trumps an I<--excludeuser> option. You can
|
|||
|
give each option more than once for multiple users, or you can give a
|
|||
|
comma-separated list. The actions that currently use these options are:
|
|||
|
|
|||
|
=over 4
|
|||
|
|
|||
|
=item database_size
|
|||
|
|
|||
|
=item last_analyze
|
|||
|
|
|||
|
=item last_autoanalyze
|
|||
|
|
|||
|
=item last_vacuum
|
|||
|
|
|||
|
=item last_autovacuum
|
|||
|
|
|||
|
=item query_time
|
|||
|
|
|||
|
=item relation_size
|
|||
|
|
|||
|
=item txn_time
|
|||
|
|
|||
|
=back
|
|||
|
|
|||
|
Examples:
|
|||
|
|
|||
|
Only check items owned by the user named greg:
|
|||
|
|
|||
|
--includeuser=greg
|
|||
|
|
|||
|
Only check items owned by either watson or crick:
|
|||
|
|
|||
|
--includeuser=watson,crick
|
|||
|
|
|||
|
Only check items owned by crick,franklin, watson, or wilkins:
|
|||
|
|
|||
|
--includeuser=watson --includeuser=franklin --includeuser=crick,wilkins
|
|||
|
|
|||
|
Check all items except for those belonging to the user scott:
|
|||
|
|
|||
|
--excludeuser=scott
|
|||
|
|
|||
|
=head1 TEST MODE
|
|||
|
|
|||
|
To help in setting things up, this program can be run in a "test mode" by
|
|||
|
specifying the I<--test> option. This will perform some basic tests to
|
|||
|
make sure that the databases can be contacted, and that certain per-action
|
|||
|
prerequisites are met, such as whether the user is a superuser, if the version
|
|||
|
of Postgres is new enough, and if stats_row_level is enabled.
|
|||
|
|
|||
|
=head1 FILES
|
|||
|
|
|||
|
In addition to command-line configurations, you can put any options inside of a file. The file
|
|||
|
F<.check_postgresrc> in the current directory will be used if found. If not found, then the file
|
|||
|
F<~/.check_postgresrc> will be used. Finally, the file /etc/check_postgresrc will be used if available.
|
|||
|
The format of the file is option = value, one per line. Any line starting with a '#' will be skipped.
|
|||
|
Any values loaded from a check_postgresrc file will be overwritten by command-line options. All
|
|||
|
check_postgresrc files can be ignored by supplying a C<--no-checkpostgresrc> argument.
|
|||
|
|
|||
|
=head1 ENVIRONMENT VARIABLES
|
|||
|
|
|||
|
The environment variable I<$ENV{HOME}> is used to look for a F<.check_postgresrc> file.
|
|||
|
|
|||
|
=head1 TIPS AND TRICKS
|
|||
|
|
|||
|
Since this program uses the B<psql> program, make sure it is accessible to the
|
|||
|
user running the script. If run as a cronjob, this often means modifying the
|
|||
|
B<PATH> environment variable.
|
|||
|
|
|||
|
If you are using Nagios in embedded Perl mode, use the C<--action> argument
|
|||
|
instead of symlinks, so that the plugin only gets compiled one time.
|
|||
|
|
|||
|
=head1 DEPENDENCIES
|
|||
|
|
|||
|
Access to a working version of psql, and the following very standard Perl modules:
|
|||
|
|
|||
|
=over 4
|
|||
|
|
|||
|
=item B<Cwd>
|
|||
|
|
|||
|
=item B<Getopt::Long>
|
|||
|
|
|||
|
=item B<File::Basename>
|
|||
|
|
|||
|
=item B<File::Temp>
|
|||
|
|
|||
|
=item B<Time::HiRes> (if C<$opt{showtime}> is set to true, which is the default)
|
|||
|
|
|||
|
=back
|
|||
|
|
|||
|
The L</settings_checksum> action requires the B<Digest::MD5> module.
|
|||
|
|
|||
|
The L</checkpoint> action requires the B<Date::Parse> module.
|
|||
|
|
|||
|
Some actions require access to external programs. If psql is not explicitly
|
|||
|
specified, the command B<C<which>> is used to find it. The program B<C</bin/df>>
|
|||
|
is needed by the L</disk_space> action.
|
|||
|
|
|||
|
=head1 DEVELOPMENT
|
|||
|
|
|||
|
Development happens using the git system. You can clone the latest version by doing:
|
|||
|
|
|||
|
git clone git://bucardo.org/check_postgres.git
|
|||
|
|
|||
|
=head1 MAILING LIST
|
|||
|
|
|||
|
Three mailing lists are available. For discussions about the program, bug reports,
|
|||
|
feature requests, and commit notices, send email to check_postgres@bucardo.org
|
|||
|
|
|||
|
https://mail.endcrypt.com/mailman/listinfo/check_postgres
|
|||
|
|
|||
|
A low-volume list for announcement of new versions and important notices is the
|
|||
|
'check_postgres-announce' list:
|
|||
|
|
|||
|
https://mail.endcrypt.com/mailman/listinfo/check_postgres-announce
|
|||
|
|
|||
|
Source code changes (via git-commit) are sent to the
|
|||
|
'check_postgres-commit' list:
|
|||
|
|
|||
|
https://mail.endcrypt.com/mailman/listinfo/check_postgres-commit
|
|||
|
|
|||
|
=head1 HISTORY
|
|||
|
|
|||
|
Items not specifically attributed are by GSM (Greg Sabino Mullane).
|
|||
|
|
|||
|
=over 4
|
|||
|
|
|||
|
=item B<Version 2.19.0> January 17, 2012
|
|||
|
|
|||
|
Add the --assume-prod option (Cédric Villemain)
|
|||
|
|
|||
|
Add the cluster_id check (Cédric Villemain)
|
|||
|
|
|||
|
Improve settings_checksum and checkpoint tests (Cédric Villemain)
|
|||
|
|
|||
|
Do no do an inner join to pg_user when checking database size
|
|||
|
(Greg Sabino Mullane; reported by Emmanuel Lesouef)
|
|||
|
|
|||
|
Use the full path when getting sequence information for same_schema.
|
|||
|
(Greg Sabino Mullane; reported by Cindy Wise)
|
|||
|
|
|||
|
Fix the formula for calculating xlog positions (Euler Taveira de Oliveira)
|
|||
|
|
|||
|
Better ordering of output for bloat check - make indexes as important
|
|||
|
as tables (Greg Sabino Mullane; reported by Jens Wilke)
|
|||
|
|
|||
|
Show the dbservice if it was used at top of same_schema output
|
|||
|
(Mike Blackwell)
|
|||
|
|
|||
|
Better installation paths (Greg Sabino Mullane, per bug 53)
|
|||
|
|
|||
|
=item B<Version 2.18.0> October 2, 2011
|
|||
|
|
|||
|
Redo the same_schema action. Use new --filter argument for all filtering.
|
|||
|
Allow comparisons between any number of databases.
|
|||
|
Remove the dbname2, dbport2, etc. arguments.
|
|||
|
Allow comparison of the same db over time.
|
|||
|
|
|||
|
Swap db1 and db2 if the slave is 1 for the hot standby check (David E. Wheeler)
|
|||
|
|
|||
|
Allow multiple --schema arguments for the slony_status action (GSM and Jehan-Guillaume de Rorthais)
|
|||
|
|
|||
|
Fix ORDER BY in the last vacuum/analyze action (Nicolas Thauvin)
|
|||
|
|
|||
|
Fix check_hot_standby_delay perfdata output (Nicolas Thauvin)
|
|||
|
|
|||
|
Look in the correct place for the .ready files with the archive_ready action (Nicolas Thauvin)
|
|||
|
|
|||
|
New action: commitratio (Guillaume Lelarge)
|
|||
|
|
|||
|
New action: hitratio (Guillaume Lelarge)
|
|||
|
|
|||
|
Make sure --action overrides the symlink naming trick.
|
|||
|
|
|||
|
Set defaults for archive_ready and wal_files (Thomas Guettler, GSM)
|
|||
|
|
|||
|
Better output for wal_files and archive_ready (GSM)
|
|||
|
|
|||
|
Fix warning when client_port set to empty string (bug #79)
|
|||
|
|
|||
|
Account for "empty row" in -x output (i.e. source of functions).
|
|||
|
|
|||
|
Fix some incorrectly named data fields (Andy Lester)
|
|||
|
|
|||
|
Expand the number of pgbouncer actions (Ruslan Kabalin)
|
|||
|
|
|||
|
Give detailed information and refactor txn_idle, txn_time, and query_time
|
|||
|
(Per request from bug #61)
|
|||
|
|
|||
|
Set maxalign to 8 in the bloat check if box identified as '64-bit'
|
|||
|
(Michel Sijmons, bug #66)
|
|||
|
|
|||
|
Support non-standard version strings in the bloat check.
|
|||
|
(Michel Sijmons and Gurjeet Singh, bug #66)
|
|||
|
|
|||
|
Do not show excluded databases in some output (Ruslan Kabalin)
|
|||
|
|
|||
|
Allow "and", "or" inside arguments (David E. Wheeler)
|
|||
|
|
|||
|
Add the "new_version_box" action.
|
|||
|
|
|||
|
Fix psql version regex (Peter Eisentraut, bug #69)
|
|||
|
|
|||
|
Add the --assume-standby-mode option (Ruslan Kabalin)
|
|||
|
|
|||
|
Note that txn_idle and query_time require 8.3 (Thomas Guettler)
|
|||
|
|
|||
|
Standardize and clean up all perfdata output (bug #52)
|
|||
|
|
|||
|
Exclude "idle in transaction" from the query_time check (bug #43)
|
|||
|
|
|||
|
Fix the perflimit for the bloat action (bug #50)
|
|||
|
|
|||
|
Clean up the custom_query action a bit.
|
|||
|
|
|||
|
Fix space in perfdata for hot_standby_delay action (Nicolas Thauvin)
|
|||
|
|
|||
|
Handle undef percents in check_fsm_relations (Andy Lester)
|
|||
|
|
|||
|
Fix typo in dbstats action (Stas Vitkovsky)
|
|||
|
|
|||
|
Fix MRTG for last vacuum and last_analyze actions.
|
|||
|
|
|||
|
=item B<Version 2.17.0> no public release
|
|||
|
|
|||
|
=item B<Version 2.16.0> January 20, 2011
|
|||
|
|
|||
|
Add new action 'hot_standby_delay' (Nicolas Thauvin)
|
|||
|
Add cache-busting for the version-grabbing utilities.
|
|||
|
Fix problem with going to next method for new_version_pg
|
|||
|
(Greg Sabino Mullane, reported by Hywel Mallett in bug #65)
|
|||
|
Allow /usr/local/etc as an alternative location for the
|
|||
|
check_postgresrc file (Hywel Mallett)
|
|||
|
Do not use tgisconstraint in same_schema if Postgres >= 9
|
|||
|
(Guillaume Lelarge)
|
|||
|
|
|||
|
=item B<Version 2.15.4> January 3, 2011
|
|||
|
|
|||
|
Fix warning when using symlinks
|
|||
|
(Greg Sabino Mullane, reported by Peter Eisentraut in bug #63)
|
|||
|
|
|||
|
=item B<Version 2.15.3> December 30, 2010
|
|||
|
|
|||
|
Show OK for no matching txn_idle entries.
|
|||
|
|
|||
|
=item B<Version 2.15.2> December 28, 2010
|
|||
|
|
|||
|
Better formatting of sizes in the bloat action output.
|
|||
|
|
|||
|
Remove duplicate perfs in bloat action output.
|
|||
|
|
|||
|
=item B<Version 2.15.1> December 27, 2010
|
|||
|
|
|||
|
Fix problem when examining items in pg_settings (Greg Sabino Mullane)
|
|||
|
|
|||
|
For connection test, return critical, not unknown, on FATAL errors
|
|||
|
(Greg Sabino Mullane, reported by Peter Eisentraut in bug #62)
|
|||
|
|
|||
|
=item B<Version 2.15.0> November 8, 2010
|
|||
|
|
|||
|
Add --quiet argument to suppress output on OK Nagios results
|
|||
|
Add index comparison for same_schema (Norman Yamada and Greg Sabino Mullane)
|
|||
|
Use $ENV{PGSERVICE} instead of "service=" to prevent problems (Guillaume Lelarge)
|
|||
|
Add --man option to show the entire manual. (Andy Lester)
|
|||
|
Redo the internal run_command() sub to use -x and hashes instead of regexes.
|
|||
|
Fix error in custom logic (Andreas Mager)
|
|||
|
Add the "pgbouncer_checksum" action (Guillaume Lelarge)
|
|||
|
Fix regex to work on WIN32 for check_fsm_relations and check_fsm_pages (Luke Koops)
|
|||
|
Don't apply a LIMIT when using --exclude on the bloat action (Marti Raudsepp)
|
|||
|
Change the output of query_time to show pid,user,port, and address (Giles Westwood)
|
|||
|
Fix to show database properly when using slony_status (Guillaume Lelarge)
|
|||
|
Allow warning items for same_schema to be comma-separated (Guillaume Lelarge)
|
|||
|
Constraint definitions across Postgres versions match better in same_schema.
|
|||
|
Work against "EnterpriseDB" databases (Sivakumar Krishnamurthy and Greg Sabino Mullane)
|
|||
|
Separate perfdata with spaces (Jehan-Guillaume (ioguix) de Rorthais)
|
|||
|
Add new action "archive_ready" (Jehan-Guillaume (ioguix) de Rorthais)
|
|||
|
|
|||
|
=item B<Version 2.14.3> (March 1, 2010)
|
|||
|
|
|||
|
Allow slony_status action to handle more than one slave.
|
|||
|
Use commas to separate function args in same_schema output (Robert Treat)
|
|||
|
|
|||
|
=item B<Version 2.14.2> (February 18, 2010)
|
|||
|
|
|||
|
Change autovac_freeze default warn/critical back to 90%/95% (Robert Treat)
|
|||
|
Put all items one-per-line for relation size actions if --verbose=1
|
|||
|
|
|||
|
=item B<Version 2.14.1> (February 17, 2010)
|
|||
|
|
|||
|
Don't use $^T in logfile check, as script may be long-running
|
|||
|
Change the error string for the logfile action for easier exclusion
|
|||
|
by programs like tail_n_mail
|
|||
|
|
|||
|
=item B<Version 2.14.0> (February 11, 2010)
|
|||
|
|
|||
|
Added the 'slony_status' action.
|
|||
|
Changed the logfile sleep from 0.5 to 1, as 0.5 gets rounded to 0 on some boxes!
|
|||
|
|
|||
|
=item B<Version 2.13.2> (February 4, 2010)
|
|||
|
|
|||
|
Allow timeout option to be used for logtime 'sleep' time.
|
|||
|
|
|||
|
=item B<Version 2.13.2> (February 4, 2010)
|
|||
|
|
|||
|
Show offending database for query_time action.
|
|||
|
Apply perflimit to main output for sequence action.
|
|||
|
Add 'noowner' option to same_schema action.
|
|||
|
Raise sleep timeout for logfile check to 15 seconds.
|
|||
|
|
|||
|
=item B<Version 2.13.1> (February 2, 2010)
|
|||
|
|
|||
|
Fix bug preventing column constraint differences from 2 > 1 for same_schema from being shown.
|
|||
|
Allow aliases 'dbname1', 'dbhost1', 'dbport1',etc.
|
|||
|
Added "nolanguage" as a filter for the same_schema option.
|
|||
|
Don't track "generic" table constraints (e.. $1, $2) using same_schema
|
|||
|
|
|||
|
=item B<Version 2.13.0> (January 29, 2010)
|
|||
|
|
|||
|
Allow "nofunctions" as a filter for the same_schema option.
|
|||
|
Added "noperm" as a filter for the same_schema option.
|
|||
|
Ignore dropped columns when considered positions for same_schema (Guillaume Lelarge)
|
|||
|
|
|||
|
=item B<Version 2.12.1> (December 3, 2009)
|
|||
|
|
|||
|
Change autovac_freeze default warn/critical from 90%/95% to 105%/120% (Marti Raudsepp)
|
|||
|
|
|||
|
=item B<Version 2.12.0> (December 3, 2009)
|
|||
|
|
|||
|
Allow the temporary directory to be specified via the "tempdir" argument,
|
|||
|
for systems that need it (e.g. /tmp is not owned by root).
|
|||
|
Fix so old versions of Postgres (< 8.0) use the correct default database (Giles Westwood)
|
|||
|
For "same_schema" trigger mismatches, show the attached table.
|
|||
|
Add the new_version_bc check for Bucardo version checking.
|
|||
|
Add database name to perf output for last_vacuum|analyze (Guillaume Lelarge)
|
|||
|
Fix for bloat action against old versions of Postgres without the 'block_size' param.
|
|||
|
|
|||
|
=item B<Version 2.11.1> (August 27, 2009)
|
|||
|
|
|||
|
Proper Nagios output for last_vacuum|analyze actions. (Cédric Villemain)
|
|||
|
Proper Nagios output for locks action. (Cédric Villemain)
|
|||
|
Proper Nagios output for txn_wraparound action. (Cédric Villemain)
|
|||
|
Fix for constraints with embedded newlines for same_schema.
|
|||
|
Allow --exclude for all items when using same_schema.
|
|||
|
|
|||
|
=item B<Version 2.11.0> (August 23, 2009)
|
|||
|
|
|||
|
Add Nagios perf output to the wal_files check (Cédric Villemain)
|
|||
|
Add support for .check_postgresrc, per request from Albe Laurenz.
|
|||
|
Allow list of web fetch methods to be changed with the --get_method option.
|
|||
|
Add support for the --language argument, which overrides any ENV.
|
|||
|
Add the --no-check_postgresrc flag.
|
|||
|
Ensure check_postgresrc options are completely overridden by command-line options.
|
|||
|
Fix incorrect warning > critical logic in replicate_rows (Glyn Astill)
|
|||
|
|
|||
|
=item B<Version 2.10.0> (August 3, 2009)
|
|||
|
|
|||
|
For same_schema, compare view definitions, and compare languages.
|
|||
|
Make script into a global executable via the Makefile.PL file.
|
|||
|
Better output when comparing two databases.
|
|||
|
Proper Nagios output syntax for autovac_freeze and backends checks (Cédric Villemain)
|
|||
|
|
|||
|
=item B<Version 2.9.5> (July 24, 2009)
|
|||
|
|
|||
|
Don't use a LIMIT in check_bloat if --include is used. Per complaint from Jeff Frost.
|
|||
|
|
|||
|
=item B<Version 2.9.4> (July 21, 2009)
|
|||
|
|
|||
|
More French translations (Guillaume Lelarge)
|
|||
|
|
|||
|
=item B<Version 2.9.3> (July 14, 2009)
|
|||
|
|
|||
|
Quote dbname in perf output for the backends check. (Davide Abrigo)
|
|||
|
Add 'fetch' as an alternative method for new_version checks, as this
|
|||
|
comes by default with FreeBSD. (Hywel Mallett)
|
|||
|
|
|||
|
=item B<Version 2.9.2> (July 12, 2009)
|
|||
|
|
|||
|
Allow dots and dashes in database name for the backends check (Davide Abrigo)
|
|||
|
Check and display the database for each match in the bloat check (Cédric Villemain)
|
|||
|
Handle 'too many connections' FATAL error in the backends check with a critical,
|
|||
|
rather than a generic error (Greg, idea by Jürgen Schulz-Brüssel)
|
|||
|
Do not allow perflimit to interfere with exclusion rules in the vacuum and
|
|||
|
analyze tests. (Greg, bug reported by Jeff Frost)
|
|||
|
|
|||
|
=item B<Version 2.9.1> (June 12, 2009)
|
|||
|
|
|||
|
Fix for multiple databases with the check_bloat action (Mark Kirkwood)
|
|||
|
Fixes and improvements to the same_schema action (Jeff Boes)
|
|||
|
Write tests for same_schema, other minor test fixes (Jeff Boes)
|
|||
|
|
|||
|
=item B<Version 2.9.0> (May 28, 2009)
|
|||
|
|
|||
|
Added the same_schema action (Greg)
|
|||
|
|
|||
|
=item B<Version 2.8.1> (May 15, 2009)
|
|||
|
|
|||
|
Added timeout via statement_timeout in addition to perl alarm (Greg)
|
|||
|
|
|||
|
=item B<Version 2.8.0> (May 4, 2009)
|
|||
|
|
|||
|
Added internationalization support (Greg)
|
|||
|
Added the 'disabled_triggers' check (Greg)
|
|||
|
Added the 'prepared_txns' check (Greg)
|
|||
|
Added the 'new_version_cp' and 'new_version_pg' checks (Greg)
|
|||
|
French translations (Guillaume Lelarge)
|
|||
|
Make the backends search return ok if no matches due to inclusion rules,
|
|||
|
per report by Guillaume Lelarge (Greg)
|
|||
|
Added comprehensive unit tests (Greg, Jeff Boes, Selena Deckelmann)
|
|||
|
Make fsm_pages and fsm_relations handle 8.4 servers smoothly. (Greg)
|
|||
|
Fix missing 'upd' field in show_dbstats (Andras Fabian)
|
|||
|
Allow ENV{PGCONTROLDATA} and ENV{PGBINDIR}. (Greg)
|
|||
|
Add various Perl module infrastructure (e.g. Makefile.PL) (Greg)
|
|||
|
Fix incorrect regex in txn_wraparound (Greg)
|
|||
|
For txn_wraparound: consistent ordering and fix duplicates in perf output (Andras Fabian)
|
|||
|
Add in missing exabyte regex check (Selena Deckelmann)
|
|||
|
Set stats to zero if we bail early due to USERWHERECLAUSE (Andras Fabian)
|
|||
|
Add additional items to dbstats output (Andras Fabian)
|
|||
|
Remove --schema option from the fsm_ checks. (Greg Mullane and Robert Treat)
|
|||
|
Handle case when ENV{PGUSER} is set. (Andy Lester)
|
|||
|
Many various fixes. (Jeff Boes)
|
|||
|
Fix --dbservice: check version and use ENV{PGSERVICE} for old versions (Cédric Villemain)
|
|||
|
|
|||
|
=item B<Version 2.7.3> (February 10, 2009)
|
|||
|
|
|||
|
Make the sequence action check if sequence being used for a int4 column and
|
|||
|
react appropriately. (Michael Glaesemann)
|
|||
|
|
|||
|
=item B<Version 2.7.2> (February 9, 2009)
|
|||
|
|
|||
|
Fix to prevent multiple groupings if db arguments given.
|
|||
|
|
|||
|
=item B<Version 2.7.1> (February 6, 2009)
|
|||
|
|
|||
|
Allow the -p argument for port to work again.
|
|||
|
|
|||
|
=item B<Version 2.7.0> (February 4, 2009)
|
|||
|
|
|||
|
Do not require a connection argument, but use defaults and ENV variables when
|
|||
|
possible: PGHOST, PGPORT, PGUSER, PGDATABASE.
|
|||
|
|
|||
|
=item B<Version 2.6.1> (February 4, 2009)
|
|||
|
|
|||
|
Only require Date::Parse to be loaded if using the checkpoint action.
|
|||
|
|
|||
|
=item B<Version 2.6.0> (January 26, 2009)
|
|||
|
|
|||
|
Add the 'checkpoint' action.
|
|||
|
|
|||
|
=item B<Version 2.5.4> (January 7, 2009)
|
|||
|
|
|||
|
Better checking of $opt{dbservice} structure (Cédric Villemain)
|
|||
|
Fix time display in timesync action output (Selena Deckelmann)
|
|||
|
Fix documentation typos (Josh Tolley)
|
|||
|
|
|||
|
=item B<Version 2.5.3> (December 17, 2008)
|
|||
|
|
|||
|
Minor fix to regex in verify_version (Lee Jensen)
|
|||
|
|
|||
|
=item B<Version 2.5.2> (December 16, 2008)
|
|||
|
|
|||
|
Minor documentation tweak.
|
|||
|
|
|||
|
=item B<Version 2.5.1> (December 11, 2008)
|
|||
|
|
|||
|
Add support for --noidle flag to prevent backends action from counting idle processes.
|
|||
|
Patch by Selena Deckelmann.
|
|||
|
|
|||
|
Fix small undefined warning when not using --dbservice.
|
|||
|
|
|||
|
=item B<Version 2.5.0> (December 4, 2008)
|
|||
|
|
|||
|
Add support for the pg_Service.conf file with the --dbservice option.
|
|||
|
|
|||
|
=item B<Version 2.4.3> (November 7, 2008)
|
|||
|
|
|||
|
Fix options for replicate_row action, per report from Jason Gordon.
|
|||
|
|
|||
|
=item B<Version 2.4.2> (November 6, 2008)
|
|||
|
|
|||
|
Wrap File::Temp::cleanup() calls in eval, in case File::Temp is an older version.
|
|||
|
Patch by Chris Butler.
|
|||
|
|
|||
|
=item B<Version 2.4.1> (November 5, 2008)
|
|||
|
|
|||
|
Cast numbers to numeric to support sequences ranges > bigint in check_sequence action.
|
|||
|
Thanks to Scott Marlowe for reporting this.
|
|||
|
|
|||
|
=item B<Version 2.4.0> (October 26, 2008)
|
|||
|
|
|||
|
Add Cacti support with the dbstats action.
|
|||
|
Pretty up the time output for last vacuum and analyze actions.
|
|||
|
Show the percentage of backends on the check_backends action.
|
|||
|
|
|||
|
=item B<Version 2.3.10> (October 23, 2008)
|
|||
|
|
|||
|
Fix minor warning in action check_bloat with multiple databases.
|
|||
|
Allow warning to be greater than critical when using the --reverse option.
|
|||
|
Support the --perflimit option for the check_sequence action.
|
|||
|
|
|||
|
=item B<Version 2.3.9> (October 23, 2008)
|
|||
|
|
|||
|
Minor tweak to way we store the default port.
|
|||
|
|
|||
|
=item B<Version 2.3.8> (October 21, 2008)
|
|||
|
|
|||
|
Allow the default port to be changed easily.
|
|||
|
Allow transform of simple output by MB, GB, etc.
|
|||
|
|
|||
|
=item B<Version 2.3.7> (October 14, 2008)
|
|||
|
|
|||
|
Allow multiple databases in 'sequence' action. Reported by Christoph Zwerschke.
|
|||
|
|
|||
|
=item B<Version 2.3.6> (October 13, 2008)
|
|||
|
|
|||
|
Add missing $schema to check_fsm_pages. (Robert Treat)
|
|||
|
|
|||
|
=item B<Version 2.3.5> (October 9, 2008)
|
|||
|
|
|||
|
Change option 'checktype' to 'valtype' to prevent collisions with -c[ritical]
|
|||
|
Better handling of errors.
|
|||
|
|
|||
|
=item B<Version 2.3.4> (October 9, 2008)
|
|||
|
|
|||
|
Do explicit cleanups of the temp directory, per problems reported by sb@nnx.com.
|
|||
|
|
|||
|
=item B<Version 2.3.3> (October 8, 2008)
|
|||
|
|
|||
|
Account for cases where some rounding queries give -0 instead of 0.
|
|||
|
Thanks to Glyn Astill for helping to track this down.
|
|||
|
|
|||
|
=item B<Version 2.3.2> (October 8, 2008)
|
|||
|
|
|||
|
Always quote identifiers in check_replicate_row action.
|
|||
|
|
|||
|
=item B<Version 2.3.1> (October 7, 2008)
|
|||
|
|
|||
|
Give a better error if one of the databases cannot be reached.
|
|||
|
|
|||
|
=item B<Version 2.3.0> (October 4, 2008)
|
|||
|
|
|||
|
Add the "sequence" action, thanks to Gavin M. Roy for the idea.
|
|||
|
Fix minor problem with autovac_freeze action when using MRTG output.
|
|||
|
Allow output argument to be case-insensitive.
|
|||
|
Documentation fixes.
|
|||
|
|
|||
|
=item B<Version 2.2.4> (October 3, 2008)
|
|||
|
|
|||
|
Fix some minor typos
|
|||
|
|
|||
|
=item B<Version 2.2.3> (October 1, 2008)
|
|||
|
|
|||
|
Expand range of allowed names for --repinfo argument (Glyn Astill)
|
|||
|
Documentation tweaks.
|
|||
|
|
|||
|
=item B<Version 2.2.2> (September 30, 2008)
|
|||
|
|
|||
|
Fixes for minor output and scoping problems.
|
|||
|
|
|||
|
=item B<Version 2.2.1> (September 28, 2008)
|
|||
|
|
|||
|
Add MRTG output to fsm_pages and fsm_relations.
|
|||
|
Force error messages to one-line for proper Nagios output.
|
|||
|
Check for invalid prereqs on failed command. From conversations with Euler Taveira de Oliveira.
|
|||
|
Tweak the fsm_pages formula a little.
|
|||
|
|
|||
|
=item B<Version 2.2.0> (September 25, 2008)
|
|||
|
|
|||
|
Add fsm_pages and fsm_relations actions. (Robert Treat)
|
|||
|
|
|||
|
=item B<Version 2.1.4> (September 22, 2008)
|
|||
|
|
|||
|
Fix for race condition in txn_time action.
|
|||
|
Add --debugoutput option.
|
|||
|
|
|||
|
=item B<Version 2.1.3> (September 22, 2008)
|
|||
|
|
|||
|
Allow alternate arguments "dbhost" for "host" and "dbport" for "port".
|
|||
|
Output a zero as default value for second line of MRTG output.
|
|||
|
|
|||
|
=item B<Version 2.1.2> (July 28, 2008)
|
|||
|
|
|||
|
Fix sorting error in the "disk_space" action for non-Nagios output.
|
|||
|
Allow --simple as a shortcut for --output=simple.
|
|||
|
|
|||
|
=item B<Version 2.1.1> (July 22, 2008)
|
|||
|
|
|||
|
Don't check databases with datallowconn false for the "autovac_freeze" action.
|
|||
|
|
|||
|
=item B<Version 2.1.0> (July 18, 2008)
|
|||
|
|
|||
|
Add the "autovac_freeze" action, thanks to Robert Treat for the idea and design.
|
|||
|
Put an ORDER BY on the "txn_wraparound" action.
|
|||
|
|
|||
|
=item B<Version 2.0.1> (July 16, 2008)
|
|||
|
|
|||
|
Optimizations to speed up the "bloat" action quite a bit.
|
|||
|
Fix "version" action to not always output in mrtg mode.
|
|||
|
|
|||
|
=item B<Version 2.0.0> (July 15, 2008)
|
|||
|
|
|||
|
Add support for MRTG and "simple" output options.
|
|||
|
Many small improvements to nearly all actions.
|
|||
|
|
|||
|
=item B<Version 1.9.1> (June 24, 2008)
|
|||
|
|
|||
|
Fix an error in the bloat SQL in 1.9.0
|
|||
|
Allow percentage arguments to be over 99%
|
|||
|
Allow percentages in the bloat --warning and --critical (thanks to Robert Treat for the idea)
|
|||
|
|
|||
|
=item B<Version 1.9.0> (June 22, 2008)
|
|||
|
|
|||
|
Don't include information_schema in certain checks. (Jeff Frost)
|
|||
|
Allow --include and --exclude to use schemas by using a trailing period.
|
|||
|
|
|||
|
=item B<Version 1.8.5> (June 22, 2008)
|
|||
|
|
|||
|
Output schema name before table name where appropriate.
|
|||
|
Thanks to Jeff Frost.
|
|||
|
|
|||
|
=item B<Version 1.8.4> (June 19, 2008)
|
|||
|
|
|||
|
Better detection of problems in --replicate_row.
|
|||
|
|
|||
|
=item B<Version 1.8.3> (June 18, 2008)
|
|||
|
|
|||
|
Fix 'backends' action: there may be no rows in pg_stat_activity, so run a second
|
|||
|
query if needed to find the max_connections setting.
|
|||
|
Thanks to Jeff Frost for the bug report.
|
|||
|
|
|||
|
=item B<Version 1.8.2> (June 10, 2008)
|
|||
|
|
|||
|
Changes to allow working under Nagios' embedded Perl mode. (Ioannis Tambouras)
|
|||
|
|
|||
|
=item B<Version 1.8.1> (June 9, 2008)
|
|||
|
|
|||
|
Allow 'bloat' action to work on Postgres version 8.0.
|
|||
|
Allow for different commands to be run for each action depending on the server version.
|
|||
|
Give better warnings when running actions not available on older Postgres servers.
|
|||
|
|
|||
|
=item B<Version 1.8.0> (June 3, 2008)
|
|||
|
|
|||
|
Add the --reverse option to the custom_query action.
|
|||
|
|
|||
|
=item B<Version 1.7.1> (June 2, 2008)
|
|||
|
|
|||
|
Fix 'query_time' action: account for race condition in which zero rows appear in pg_stat_activity.
|
|||
|
Thanks to Dustin Black for the bug report.
|
|||
|
|
|||
|
=item B<Version 1.7.0> (May 11, 2008)
|
|||
|
|
|||
|
Add --replicate_row action
|
|||
|
|
|||
|
=item B<Version 1.6.1> (May 11, 2008)
|
|||
|
|
|||
|
Add --symlinks option as a shortcut to --action=rebuild_symlinks
|
|||
|
|
|||
|
=item B<Version 1.6.0> (May 11, 2008)
|
|||
|
|
|||
|
Add the custom_query action.
|
|||
|
|
|||
|
=item B<Version 1.5.2> (May 2, 2008)
|
|||
|
|
|||
|
Fix problem with too eager creation of custom pgpass file.
|
|||
|
|
|||
|
=item B<Version 1.5.1> (April 17, 2008)
|
|||
|
|
|||
|
Add example Nagios configuration settings (Brian A. Seklecki)
|
|||
|
|
|||
|
=item B<Version 1.5.0> (April 16, 2008)
|
|||
|
|
|||
|
Add the --includeuser and --excludeuser options. Documentation cleanup.
|
|||
|
|
|||
|
=item B<Version 1.4.3> (April 16, 2008)
|
|||
|
|
|||
|
Add in the 'output' concept for future support of non-Nagios programs.
|
|||
|
|
|||
|
=item B<Version 1.4.2> (April 8, 2008)
|
|||
|
|
|||
|
Fix bug preventing --dbpass argument from working (Robert Treat).
|
|||
|
|
|||
|
=item B<Version 1.4.1> (April 4, 2008)
|
|||
|
|
|||
|
Minor documentation fixes.
|
|||
|
|
|||
|
=item B<Version 1.4.0> (April 2, 2008)
|
|||
|
|
|||
|
Have 'wal_files' action use pg_ls_dir (idea by Robert Treat).
|
|||
|
For last_vacuum and last_analyze, respect autovacuum effects, add separate
|
|||
|
autovacuum checks (ideas by Robert Treat).
|
|||
|
|
|||
|
=item B<Version 1.3.1> (April 2, 2008)
|
|||
|
|
|||
|
Have txn_idle use query_start, not xact_start.
|
|||
|
|
|||
|
=item B<Version 1.3.0> (March 23, 2008)
|
|||
|
|
|||
|
Add in txn_idle and txn_time actions.
|
|||
|
|
|||
|
=item B<Version 1.2.0> (February 21, 2008)
|
|||
|
|
|||
|
Add the 'wal_files' action, which counts the number of WAL files
|
|||
|
in your pg_xlog directory.
|
|||
|
Fix some typos in the docs.
|
|||
|
Explicitly allow -v as an argument.
|
|||
|
Allow for a null syslog_facility in the 'logfile' action.
|
|||
|
|
|||
|
=item B<Version 1.1.2> (February 5, 2008)
|
|||
|
|
|||
|
Fix error preventing --action=rebuild_symlinks from working.
|
|||
|
|
|||
|
=item B<Version 1.1.1> (February 3, 2008)
|
|||
|
|
|||
|
Switch vacuum and analyze date output to use 'DD', not 'D'. (Glyn Astill)
|
|||
|
|
|||
|
=item B<Version 1.1.0> (December 16, 2008)
|
|||
|
|
|||
|
Fixes, enhancements, and performance tracking.
|
|||
|
Add performance data tracking via --showperf and --perflimit
|
|||
|
Lots of refactoring and cleanup of how actions handle arguments.
|
|||
|
Do basic checks to figure out syslog file for 'logfile' action.
|
|||
|
Allow for exact matching of beta versions with 'version' action.
|
|||
|
Redo the default arguments to only populate when neither 'warning' nor 'critical' is provided.
|
|||
|
Allow just warning OR critical to be given for the 'timesync' action.
|
|||
|
Remove 'redirect_stderr' requirement from 'logfile' due to 8.3 changes.
|
|||
|
Actions 'last_vacuum' and 'last_analyze' are 8.2 only (Robert Treat)
|
|||
|
|
|||
|
=item B<Version 1.0.16> (December 7, 2007)
|
|||
|
|
|||
|
First public release, December 2007
|
|||
|
|
|||
|
=back
|
|||
|
|
|||
|
=head1 BUGS AND LIMITATIONS
|
|||
|
|
|||
|
The index bloat size optimization is rough.
|
|||
|
|
|||
|
Some actions may not work on older versions of Postgres (before 8.0).
|
|||
|
|
|||
|
Please report any problems to check_postgres@bucardo.org
|
|||
|
|
|||
|
=head1 AUTHOR
|
|||
|
|
|||
|
Greg Sabino Mullane <greg@endpoint.com>
|
|||
|
|
|||
|
|
|||
|
=head1 NAGIOS EXAMPLES
|
|||
|
|
|||
|
Some example Nagios configuration settings using this script:
|
|||
|
|
|||
|
define command {
|
|||
|
command_name check_postgres_size
|
|||
|
command_line $USER2$/check_postgres.pl -H $HOSTADDRESS$ -u pgsql -db postgres --action database_size -w $ARG1$ -c $ARG2$
|
|||
|
}
|
|||
|
|
|||
|
define command {
|
|||
|
command_name check_postgres_locks
|
|||
|
command_line $USER2$/check_postgres.pl -H $HOSTADDRESS$ -u pgsql -db postgres --action locks -w $ARG1$ -c $ARG2$
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
define service {
|
|||
|
use generic-other
|
|||
|
host_name dbhost.gtld
|
|||
|
service_description dbhost PostgreSQL Service Database Usage Size
|
|||
|
check_command check_postgres_size!256000000!512000000
|
|||
|
}
|
|||
|
|
|||
|
define service {
|
|||
|
use generic-other
|
|||
|
host_name dbhost.gtld
|
|||
|
service_description dbhost PostgreSQL Service Database Locks
|
|||
|
check_command check_postgres_locks!2!3
|
|||
|
}
|
|||
|
|
|||
|
=head1 LICENSE AND COPYRIGHT
|
|||
|
|
|||
|
Copyright (c) 2007-2012 Greg Sabino Mullane <greg@endpoint.com>.
|
|||
|
|
|||
|
Redistribution and use in source and binary forms, with or without
|
|||
|
modification, are permitted provided that the following conditions are met:
|
|||
|
|
|||
|
1. Redistributions of source code must retain the above copyright notice,
|
|||
|
this list of conditions and the following disclaimer.
|
|||
|
2. Redistributions in binary form must reproduce the above copyright notice,
|
|||
|
this list of conditions and the following disclaimer in the documentation
|
|||
|
and/or other materials provided with the distribution.
|
|||
|
|
|||
|
THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED
|
|||
|
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
|||
|
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
|||
|
EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|||
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
|
|||
|
OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
|
|||
|
IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
|
|||
|
OF SUCH DAMAGE.
|
|||
|
|
|||
|
=cut
|
|||
|
|
|||
|
# vi: tabstop=4 shiftwidth=4 expandtab
|