--- badi/public_scripts/canardien/canardien 2013/06/26 00:13:44 1.3 +++ badi/public_scripts/canardien/canardien 2023/08/06 20:06:51 1.6 @@ -1,17 +1,22 @@ #!/bin/bash -# canardien 0.0.3 -# (c) 2005 under GPL by Adrian Zaugg +# canardien 0.0.6 +# (c) 2005-2021 under GPL by Adrian Zaugg -# canardiens pings ente.limmat.ch to determine wether she is gone diving. -# ToDo: multi host, multi eMail +# canardien [ [ ...]] + +# canardiens pings a machine, originally ente.limmat.ch thus its name, +# to determine wether she is gone diving. + ## Settings # -# Host to ping. It's ente.limmat.ch -HOST="ente.limmat.ch" +# Hosts to ping. use a space separated list for multiple targets. All +# hosts of the list are checked and their results reported individually. +# Hosts given on the command line overwrite this setting. +HOSTS="" # eMail Alerts # Send alert email messages to the following address(es). Leave empty @@ -19,126 +24,181 @@ HOST="ente.limmat.ch" ALERT_TO="root" # Subject of alert email -ALERT_SUBJECT="Attention: no answer from $HOST anymore!" +ALERT_SUBJECT='"Attention: no answer from $HOST anymore!"' -# Text of Message (But in 'single quotes' to protect variables. They should get -# expanded at the time the message is sent!) +# Text of Message (Put variables in 'single quotes' to protect them. They +# should get expanded at the time the message is sent!) ALERT_TEXT='"\n[$TIME_STAMP]\n\nALERT!!\n\n\t$HOST is down!\n\nYou should probably do something, please.\n\nKind regards, $PINGHOST."' -# path to fping -PING=`which fping` +# Path to fping +PING="" -# answer of fping to reachable hosts +# Answer of fping to reachable hosts ALIVE_ANSWER="is alive" +# Max number of pakets to send before giving up. Time increases exponentially, +# use a number < 7. +RETRIES=5 + # Temporary file path TMPDIR="/tmp" # This hosts name -PINGHOST=`uname -n` +PINGHOST="$(uname -n)" -# set to an empty string to avoid debug output, +# Set to an empty string to avoid debug output, # to "low" for a few, output and to anything else # for verbose output -DEBUG=loud +DEBUG=verbose -# for silent (non-error) operation set to anything, +# For silent (non-error) operation set to anything, # comment out to enable text output -# (migration to use shout is in progress) -SILENT=shshsh +#SILENT=shshsh # -----------functions----------- -# If ente.limmat.ch responds to pings, she is considered up. -function checkconnection { - unset UP +# Initialize. +function init() { + + # check fping existence + if [ -z "$PING" ]; then + PING="$(which fping)" + fi if [ ${#PING} -eq 0 ]; then - echo "Error: fping external program not in path. Exitting." + echo "Error: fping external program not found or not set. Exitting." >&2 + exit 1 + fi + if [ ! -x "$PING" ]; then + echo "Error: Can't execute the program set to use as fping. Please enter the correct path to \"fping\"." >&2 exit 1 fi - PING_ANSWER="$($PING "$HOST")" - PING_ERRNUM=$? - if [ $PING_ERRNUM -gt 2 ]; then - echo "Error $PING_ERRNUM in fping $(head -1 "$PING_ANSWER") . Exitting." - exit 1 - fi - - if [ `echo "$PING_ANSWER" | grep -c "$ALIVE_ANSWER"` -gt 0 ]; then - UP=true - fi - TIME_STAMP=`date +"%a %e.%m.%y %H:%M:%S"` - if [ -n "$UP" ]; then - if [ ! "$DEBUG" = "low" ] && [ -n "$DEBUG" ]; then - shout "The connection is up." - fi - else - if [ -n "$DEBUG" ]; then - shout "The connection is down." - fi - fi + # get hostset + if [ -n "$*" ]; then + HOSTS="$*" + fi + + # remove multiple spaces and trim + HOSTS="$(echo "$HOSTS" | sed -e "s/ \{1,\}/ /g" -e "s/^ //" -e "s/ $//")" + + if [ -z "$HOSTS" ]; then + echo "Error: No host to ping." >&2 + exit 1 + fi } -# Rember recent state of ente.limmat.ch, trigger alert -function rememberstate { - # Put a simple time stamp in a tmp file, when host gets down - if [ -z "$UP" ]; then - if [ ! -e "$TMPDIR/.canardien-$PINGHOST-$HOST" ]; then - # set time stamp - echo -ne "$TIME_STAMP" > "$TMPDIR/.canardien-$PINGHOST-$HOST" - # send alert - sendalert - elif [ -n "$DEBUG" ]; then - shout "Down since `cat $TMPDIR/.canardien-$PINGHOST-$HOST`." - fi - else - # Host is up - if [ -e "$TMPDIR/.canardien-$PINGHOST-$HOST" ]; then - if [ -n "$DEBUG" ]; then - shout "$HOST is up again." - fi - # send alert - ALERT_SUBJECT="The host $HOST answers again!!" - ALERT_TEXT='"[$(date +"%a %e.%m.%y %H:%M:%S")]\n\n\n$HOST is up again.\n--------------------------------------\n(downtime began $(cat "$TMPDIR/.canardien-$PINGHOST-$HOST"))\n\n\n Kind regards, $PINGHOST."' - sendalert - # delete tmp file - rm "$TMPDIR/.canardien-$PINGHOST-$HOST" - else - # still running - if [ ! "$DEBUG" = "low" ] && [ -n "$DEBUG" ]; then - shout "$HOST is still up." - fi - fi - fi + +# If a host responds to pings, it is considered up. +function checkconnection() { + + unset UP + + # ping host + PING_ANSWER="$($PING -R -B 2 -r $RETRIES -p 50 "$HOST" 2>&1)" + PING_ERRNUM=$? + if [ $PING_ERRNUM -gt 2 ]; then + echo "Error: Got error $PING_ERRNUM from fping $(head -1 "$PING_ANSWER"). Disregarding $HOST." >&2 + continue; + fi + + # parse answer + if [ $(echo "$PING_ANSWER" | grep -c "$ALIVE_ANSWER") -gt 0 ]; then + UP=true + fi + TIME_STAMP="$(date +"%a %e.%m.%y %H:%M:%S")" + if [ -n "$UP" ]; then + if [ ! "$DEBUG" = "low" ] && [ -n "$DEBUG" ]; then + shout "The connection to $HOST is up." + fi + else + if [ -n "$DEBUG" ]; then + shout "The connection to $HOST is down." + fi + fi + + # remember a hosts state + rememberstate +} + + +# Rember recent state of host, trigger alert +function rememberstate() { + # Put a simple time stamp in a tmp file, when host is detected as down for the first time + if [ -z "$UP" ]; then + if [ ! -e "$TMPDIR/.canardien-$PINGHOST-$HOST" ]; then + # set time stamp + echo -ne "$TIME_STAMP" > "$TMPDIR/.canardien-$PINGHOST-$HOST" + # send alert + sendalert + elif [ -n "$DEBUG" ]; then + shout "Down since `cat $TMPDIR/.canardien-$PINGHOST-$HOST`." + fi + else + # Host is up + if [ -e "$TMPDIR/.canardien-$PINGHOST-$HOST" ]; then + if [ -n "$DEBUG" ]; then + shout "$HOST is up again." + fi + # send alert + ALERT_SUBJECT='"The host $HOST answers again!"' + ALERT_TEXT='"[$(date +"%a %e.%m.%y %H:%M:%S")]\n\n\n$HOST is up again.\n--------------------------------------\n(downtime began $(cat "$TMPDIR/.canardien-$PINGHOST-$HOST"))\n\n\n Kind regards, $PINGHOST."' + sendalert + # delete tmp file + rm "$TMPDIR/.canardien-$PINGHOST-$HOST" + else + # still running + if [ ! "$DEBUG" = "low" ] && [ -n "$DEBUG" ]; then + shout "$HOST is up." + fi + fi + fi } + # Send an email alert -function sendalert { - # send mail +function sendalert() { + # send mail if [ ! -z "$ALERT_TO" ]; then - eval MSG=\$$ALERT_TEXT - echo -e -n "$MSG" | mail -s "$ALERT_SUBJECT" "$ALERT_TO" - if [ -n "$DEBUG" ]; then - shout "Alert sent to $ALERT_TO." - fi + eval alert_subject=\$$ALERT_SUBJECT + eval MSG=\$$ALERT_TEXT + echo -e -n "$MSG" | mail -s "$alert_subject" "$ALERT_TO" + if [ -n "$DEBUG" ]; then + shout "Alert sent to $ALERT_TO." + fi fi } + # To avoid any output in case of a silent operation, # shout instead of echo. -function shout() -{ - if [ -z "$SILENT" ]; then - echo -e "$1" - fi +function shout() { + if [ -z "$SILENT" ]; then + echo -e "$1" + fi } # -----------main----------- -checkconnection -rememberstate +init "$*" + +# call an instance for each host, when multiple hosts are given +if [[ "$HOSTS" =~ " " ]]; then + while read -r HOST; do + "$0" "$HOST" & + done <<< "$(echo "$HOSTS" | tr ' ' '\n')" + exit 0 +else + # process a single host + HOST="$HOSTS" + checkconnection +fi + exit 0 +# todo: Switches ( -q, --debug, ...) +# grace period: do not send a mail if down time shorter than X minutes +# repeat down info after x hours +# test port instead of ping or in addition