version 1.1.1.1, 2009/12/01 04:36:47
|
version 1.6, 2023/08/06 20:06:51
|
Line 1
|
Line 1
|
#!/bin/bash |
#!/bin/bash |
|
|
# canardien 0.0.2 |
# canardien 0.0.6 |
# (c) 2005 under GPL by Adrian Zaugg |
# (c) 2005-2021 under GPL by Adrian Zaugg |
|
|
# canardiens pings ente.limmat.ch to determine wether she is gone diving. |
|
|
|
# ToDo: multi host, multi eMail |
# canardien [<host> [<host> ...]] |
|
|
|
# canardiens pings a machine, originally ente.limmat.ch thus its name, |
|
# to determine wether she is gone diving. |
|
|
|
|
## Settings |
## Settings |
# |
# |
|
|
# Host to ping. It's ente.limmat.ch |
# Hosts to ping. use a space separated list for multiple targets. All |
HOST="ente.limmat.ch" |
# hosts of the list are checked and their results reported individually. |
|
# Hosts given on the command line overwrite this setting. |
|
HOSTS="" |
|
|
# eMail Alerts |
# eMail Alerts |
# Send alert email messages to the following address(es). Leave empty |
# Send alert email messages to the following address(es). Leave empty |
# for no alert. For multiple destinations use a comma separated list. |
# for no alert. For multiple destinations use a comma separated list. |
ALERT_TO="" |
ALERT_TO="root" |
|
|
# Subject of alert email |
# Subject of alert email |
ALERT_SUBJECT="Achtung: $HOST antwortet nicht mehr!" |
ALERT_SUBJECT='"Attention: no answer from $HOST anymore!"' |
|
|
# Text of Message (But in 'single quotes' to protect variables. They should get |
# Text of Message (Put variables in 'single quotes' to protect them. They |
# expanded at the time the message is sent!) |
# should get expanded at the time the message is sent!) |
ALERT_TEXT='"\n[$TIME_STAMP]\n\nALERT!!\n\n\t$HOST is down!\n\nYou should probably do something, please.\nKind regards, $PINGHOST."' |
ALERT_TEXT='"\n[$TIME_STAMP]\n\nALERT!!\n\n\t$HOST is down!\n\nYou should probably do something, please.\n\nKind regards, $PINGHOST."' |
|
|
# path to fping |
# Path to fping |
PING=`which fping` |
PING="" |
|
|
# answer of fping to reachable hosts |
# Answer of fping to reachable hosts |
ALIVE_ANSWER="is alive" |
ALIVE_ANSWER="is alive" |
|
|
|
# Max number of pakets to send before giving up. Time increases exponentially, |
|
# use a number < 7. |
|
RETRIES=5 |
|
|
# Temporary file path |
# Temporary file path |
TMPDIR="/tmp" |
TMPDIR="/tmp" |
|
|
# This hosts name |
# This hosts name |
PINGHOST=`uname -n` |
PINGHOST="$(uname -n)" |
|
|
# set to an empty string to avoid debug output, |
# Set to an empty string to avoid debug output, |
# to "low" for a few, output and to anything else |
# to "low" for a few, output and to anything else |
# for verbose output |
# for verbose output |
DEBUG=loud |
DEBUG=verbose |
|
|
# for silent (non-error) operation set to anything, |
# For silent (non-error) operation set to anything, |
# comment out to enable text output |
# comment out to enable text output |
# (migration to use shout is in progress) |
#SILENT=shshsh |
SILENT=shshsh |
|
|
|
|
|
# -----------functions----------- |
# -----------functions----------- |
|
|
# If ente.limmat.ch responds to pings, she is considered up. |
# Initialize. |
function checkconnection { |
function init() { |
unset UP |
|
|
# check fping existence |
|
if [ -z "$PING" ]; then |
|
PING="$(which fping)" |
|
fi |
if [ ${#PING} -eq 0 ]; then |
if [ ${#PING} -eq 0 ]; then |
echo "Error: fping external program not in path. Exitting." |
echo "Error: fping external program not found or not set. Exitting." >&2 |
|
exit 1 |
|
fi |
|
if [ ! -x "$PING" ]; then |
|
echo "Error: Can't execute the program set to use as fping. Please enter the correct path to \"fping\"." >&2 |
|
exit 1 |
|
fi |
|
|
|
# get hostset |
|
if [ -n "$*" ]; then |
|
HOSTS="$*" |
|
fi |
|
|
|
# remove multiple spaces and trim |
|
HOSTS="$(echo "$HOSTS" | sed -e "s/ \{1,\}/ /g" -e "s/^ //" -e "s/ $//")" |
|
|
|
if [ -z "$HOSTS" ]; then |
|
echo "Error: No host to ping." >&2 |
exit 1 |
exit 1 |
fi |
fi |
|
|
PING_ANSWER="$($PING "$HOST")" |
|
PING_ERRNUM=$? |
|
if [ $PING_ERRNUM -gt 2 ]; then |
|
echo "Error $PING_ERRNUM in fping $(head -1 "$PING_ANSWER") . Exitting." |
|
exit 1 |
|
fi |
|
|
|
if [ `echo "$PING_ANSWER" | grep -c "$ALIVE_ANSWER"` -gt 0 ]; then |
|
UP=true |
|
fi |
|
TIME_STAMP=`date +"%a %e.%m.%y %H:%M:%S"` |
|
if [ -n "$UP" ]; then |
|
if [ ! "$DEBUG" = "low" ] && [ -n "$DEBUG" ]; then |
|
shout "The connection is up." |
|
fi |
|
else |
|
if [ -n "$DEBUG" ]; then |
|
shout "The connection is down." |
|
fi |
|
fi |
|
} |
} |
|
|
# Rember recent state of ente.limmat.ch, trigger alert |
|
function rememberstate { |
# If a host responds to pings, it is considered up. |
# Put a simple time stamp in a tmp file, when host gets down |
function checkconnection() { |
if [ -z "$UP" ]; then |
|
if [ ! -e "$TMPDIR/.canardien-$PINGHOST-$HOST" ]; then |
unset UP |
# set time stamp |
|
echo -ne "$TIME_STAMP" > "$TMPDIR/.canardien-$PINGHOST-$HOST" |
# ping host |
# send alert |
PING_ANSWER="$($PING -R -B 2 -r $RETRIES -p 50 "$HOST" 2>&1)" |
sendalert |
PING_ERRNUM=$? |
elif [ -n "$DEBUG" ]; then |
if [ $PING_ERRNUM -gt 2 ]; then |
shout "Down since `cat $TMPDIR/.canardien-$PINGHOST-$HOST`." |
echo "Error: Got error $PING_ERRNUM from fping $(head -1 "$PING_ANSWER"). Disregarding $HOST." >&2 |
fi |
continue; |
else |
fi |
# Host is up |
|
if [ -e "$TMPDIR/.canardien-$PINGHOST-$HOST" ]; then |
# parse answer |
if [ -n "$DEBUG" ]; then |
if [ $(echo "$PING_ANSWER" | grep -c "$ALIVE_ANSWER") -gt 0 ]; then |
shout "$HOST is up again." |
UP=true |
fi |
fi |
# send alert |
TIME_STAMP="$(date +"%a %e.%m.%y %H:%M:%S")" |
ALERT_SUBJECT="$HOST antwortet wieder!" |
if [ -n "$UP" ]; then |
ALERT_TEXT='"[$(date +"%a %e.%m.%y %H:%M:%S")]\n\n\n$HOST is up again.\n--------------------------------------\n(downtime began $(cat "$TMPDIR/.canardien-$PINGHOST-$HOST"))\n\n\n Kind regards, $PINGHOST."' |
if [ ! "$DEBUG" = "low" ] && [ -n "$DEBUG" ]; then |
sendalert |
shout "The connection to $HOST is up." |
# delete tmp file |
fi |
rm "$TMPDIR/.canardien-$PINGHOST-$HOST" |
else |
else |
if [ -n "$DEBUG" ]; then |
# still running |
shout "The connection to $HOST is down." |
if [ ! "$DEBUG" = "low" ] && [ -n "$DEBUG" ]; then |
fi |
shout "$HOST is still up." |
fi |
fi |
|
fi |
# remember a hosts state |
fi |
rememberstate |
} |
} |
|
|
|
|
|
# Rember recent state of host, trigger alert |
|
function rememberstate() { |
|
# Put a simple time stamp in a tmp file, when host is detected as down for the first time |
|
if [ -z "$UP" ]; then |
|
if [ ! -e "$TMPDIR/.canardien-$PINGHOST-$HOST" ]; then |
|
# set time stamp |
|
echo -ne "$TIME_STAMP" > "$TMPDIR/.canardien-$PINGHOST-$HOST" |
|
# send alert |
|
sendalert |
|
elif [ -n "$DEBUG" ]; then |
|
shout "Down since `cat $TMPDIR/.canardien-$PINGHOST-$HOST`." |
|
fi |
|
else |
|
# Host is up |
|
if [ -e "$TMPDIR/.canardien-$PINGHOST-$HOST" ]; then |
|
if [ -n "$DEBUG" ]; then |
|
shout "$HOST is up again." |
|
fi |
|
# send alert |
|
ALERT_SUBJECT='"The host $HOST answers again!"' |
|
ALERT_TEXT='"[$(date +"%a %e.%m.%y %H:%M:%S")]\n\n\n$HOST is up again.\n--------------------------------------\n(downtime began $(cat "$TMPDIR/.canardien-$PINGHOST-$HOST"))\n\n\n Kind regards, $PINGHOST."' |
|
sendalert |
|
# delete tmp file |
|
rm "$TMPDIR/.canardien-$PINGHOST-$HOST" |
|
else |
|
# still running |
|
if [ ! "$DEBUG" = "low" ] && [ -n "$DEBUG" ]; then |
|
shout "$HOST is up." |
|
fi |
|
fi |
|
fi |
|
} |
|
|
|
|
# Send an email alert |
# Send an email alert |
function sendalert { |
function sendalert() { |
# send mail |
# send mail |
if [ ! -z "$ALERT_TO" ]; then |
if [ ! -z "$ALERT_TO" ]; then |
eval MSG=\$$ALERT_TEXT |
eval alert_subject=\$$ALERT_SUBJECT |
echo -e -n "$MSG" | mail -s "$ALERT_SUBJECT" "$ALERT_TO" |
eval MSG=\$$ALERT_TEXT |
if [ -n "$DEBUG" ]; then |
echo -e -n "$MSG" | mail -s "$alert_subject" "$ALERT_TO" |
shout "Alert sent to $ALERT_TO." |
if [ -n "$DEBUG" ]; then |
fi |
shout "Alert sent to $ALERT_TO." |
|
fi |
fi |
fi |
} |
} |
|
|
|
|
# To avoid any output in case of a silent operation, |
# To avoid any output in case of a silent operation, |
# shout instead of echo. |
# shout instead of echo. |
function shout() |
function shout() { |
{ |
if [ -z "$SILENT" ]; then |
if [ -z "$SILENT" ]; then |
echo -e "$1" |
echo -e "$1" |
fi |
fi |
|
} |
} |
|
|
|
|
# -----------main----------- |
# -----------main----------- |
|
|
checkconnection |
init "$*" |
rememberstate |
|
|
# call an instance for each host, when multiple hosts are given |
|
if [[ "$HOSTS" =~ " " ]]; then |
|
while read -r HOST; do |
|
"$0" "$HOST" & |
|
done <<< "$(echo "$HOSTS" | tr ' ' '\n')" |
|
exit 0 |
|
else |
|
# process a single host |
|
HOST="$HOSTS" |
|
checkconnection |
|
fi |
|
|
exit 0 |
exit 0 |
|
|
|
# todo: Switches ( -q, --debug, ...) |
|
# grace period: do not send a mail if down time shorter than X minutes |
|
# repeat down info after x hours |
|
# test port instead of ping or in addition |
|
|