--- badi/public_scripts/canardien/canardien 2013/06/26 00:13:44 1.3 +++ badi/public_scripts/canardien/canardien 2019/04/15 10:36:55 1.4 @@ -1,17 +1,22 @@ #!/bin/bash -# canardien 0.0.3 -# (c) 2005 under GPL by Adrian Zaugg +# canardien 0.0.4 +# (c) 2005-2019 under GPL by Adrian Zaugg -# canardiens pings ente.limmat.ch to determine wether she is gone diving. -# ToDo: multi host, multi eMail +# canardien [ [ ...]] + +# canardiens pings a machine, originally ente.limmat.ch thus its name, +# to determine wether she is gone diving. + ## Settings # -# Host to ping. It's ente.limmat.ch -HOST="ente.limmat.ch" +# Hosts to ping. use a space separated list for multiple targets. All +# hosts of the list are checked and their results reported individually. +# Hosts given on the command line overwrite this setting. +HOSTS="" # eMail Alerts # Send alert email messages to the following address(es). Leave empty @@ -19,126 +24,162 @@ HOST="ente.limmat.ch" ALERT_TO="root" # Subject of alert email -ALERT_SUBJECT="Attention: no answer from $HOST anymore!" +ALERT_SUBJECT='"Attention: no answer from $HOST anymore!"' -# Text of Message (But in 'single quotes' to protect variables. They should get -# expanded at the time the message is sent!) +# Text of Message (Put variables in 'single quotes' to protect them. They +# should get expanded at the time the message is sent!) ALERT_TEXT='"\n[$TIME_STAMP]\n\nALERT!!\n\n\t$HOST is down!\n\nYou should probably do something, please.\n\nKind regards, $PINGHOST."' -# path to fping -PING=`which fping` +# Path to fping +PING="" -# answer of fping to reachable hosts +# Answer of fping to reachable hosts ALIVE_ANSWER="is alive" +# Max number of pakets to send before giving up. Time increases exponentially, +# use a number < 7. +RETRIES=4 + # Temporary file path TMPDIR="/tmp" # This hosts name -PINGHOST=`uname -n` +PINGHOST="$(uname -n)" -# set to an empty string to avoid debug output, +# Set to an empty string to avoid debug output, # to "low" for a few, output and to anything else # for verbose output -DEBUG=loud +DEBUG=verbose -# for silent (non-error) operation set to anything, +# For silent (non-error) operation set to anything, # comment out to enable text output -# (migration to use shout is in progress) -SILENT=shshsh +#SILENT=shshsh # -----------functions----------- -# If ente.limmat.ch responds to pings, she is considered up. -function checkconnection { - unset UP +# Initialize. +function init() { + + # check fping existence + if [ -z "$PING" ]; then + PING="$(which fping)" + fi if [ ${#PING} -eq 0 ]; then - echo "Error: fping external program not in path. Exitting." + echo "Error: fping external program not found or not set. Exitting." >&2 + exit 1 + fi + if [ ! -x "$PING" ]; then + echo "Error: Can't execute the program set to use as fping. Please enter the correct path to \"fping\"." >&2 exit 1 fi - PING_ANSWER="$($PING "$HOST")" - PING_ERRNUM=$? - if [ $PING_ERRNUM -gt 2 ]; then - echo "Error $PING_ERRNUM in fping $(head -1 "$PING_ANSWER") . Exitting." - exit 1 - fi - - if [ `echo "$PING_ANSWER" | grep -c "$ALIVE_ANSWER"` -gt 0 ]; then - UP=true - fi - TIME_STAMP=`date +"%a %e.%m.%y %H:%M:%S"` - if [ -n "$UP" ]; then - if [ ! "$DEBUG" = "low" ] && [ -n "$DEBUG" ]; then - shout "The connection is up." - fi - else - if [ -n "$DEBUG" ]; then - shout "The connection is down." - fi - fi + # get hostset + if [ -n "$*" ]; then + HOSTS="$*" + fi + if [ -z "$HOSTS" ]; then + echo "Error: No host to ping." >&2 + exit 0 + fi } -# Rember recent state of ente.limmat.ch, trigger alert -function rememberstate { - # Put a simple time stamp in a tmp file, when host gets down - if [ -z "$UP" ]; then + +# If a host responds to pings, it is considered up. +function checkconnection() { + + while read -r HOST; do + unset UP + + # ping host + PING_ANSWER="$($PING -R -B 2 -r $RETRIES -p 50 "$HOST" 2>&1)" + PING_ERRNUM=$? + if [ $PING_ERRNUM -gt 2 ]; then + echo "Error: Got error $PING_ERRNUM from fping $(head -1 "$PING_ANSWER"). Disregarding $HOST." >&2 + continue; + fi + + # parse answer + if [ $(echo "$PING_ANSWER" | grep -c "$ALIVE_ANSWER") -gt 0 ]; then + UP=true + fi + TIME_STAMP="$(date +"%a %e.%m.%y %H:%M:%S")" + if [ -n "$UP" ]; then + if [ ! "$DEBUG" = "low" ] && [ -n "$DEBUG" ]; then + shout "The connection to $HOST is up." + fi + else + if [ -n "$DEBUG" ]; then + shout "The connection to $HOST is down." + fi + fi + + # remember a hosts state + rememberstate + + done <<< "$(echo "$HOSTS" | tr ' ' '\n')" +} + + +# Rember recent state of host, trigger alert +function rememberstate() { + # Put a simple time stamp in a tmp file, when host is detected as down for the first time + if [ -z "$UP" ]; then if [ ! -e "$TMPDIR/.canardien-$PINGHOST-$HOST" ]; then - # set time stamp - echo -ne "$TIME_STAMP" > "$TMPDIR/.canardien-$PINGHOST-$HOST" - # send alert - sendalert - elif [ -n "$DEBUG" ]; then - shout "Down since `cat $TMPDIR/.canardien-$PINGHOST-$HOST`." - fi - else - # Host is up - if [ -e "$TMPDIR/.canardien-$PINGHOST-$HOST" ]; then - if [ -n "$DEBUG" ]; then - shout "$HOST is up again." - fi + # set time stamp + echo -ne "$TIME_STAMP" > "$TMPDIR/.canardien-$PINGHOST-$HOST" + # send alert + sendalert + elif [ -n "$DEBUG" ]; then + shout "Down since `cat $TMPDIR/.canardien-$PINGHOST-$HOST`." + fi + else + # Host is up + if [ -e "$TMPDIR/.canardien-$PINGHOST-$HOST" ]; then + if [ -n "$DEBUG" ]; then + shout "$HOST is up again." + fi # send alert - ALERT_SUBJECT="The host $HOST answers again!!" - ALERT_TEXT='"[$(date +"%a %e.%m.%y %H:%M:%S")]\n\n\n$HOST is up again.\n--------------------------------------\n(downtime began $(cat "$TMPDIR/.canardien-$PINGHOST-$HOST"))\n\n\n Kind regards, $PINGHOST."' - sendalert - # delete tmp file - rm "$TMPDIR/.canardien-$PINGHOST-$HOST" - else - # still running - if [ ! "$DEBUG" = "low" ] && [ -n "$DEBUG" ]; then - shout "$HOST is still up." - fi - fi - fi + ALERT_SUBJECT="The host $HOST answers again!!" + ALERT_TEXT='"[$(date +"%a %e.%m.%y %H:%M:%S")]\n\n\n$HOST is up again.\n--------------------------------------\n(downtime began $(cat "$TMPDIR/.canardien-$PINGHOST-$HOST"))\n\n\n Kind regards, $PINGHOST."' + sendalert + # delete tmp file + rm "$TMPDIR/.canardien-$PINGHOST-$HOST" + else + # still running + if [ ! "$DEBUG" = "low" ] && [ -n "$DEBUG" ]; then + shout "$HOST is up." + fi + fi + fi } + # Send an email alert -function sendalert { - # send mail +function sendalert() { + # send mail if [ ! -z "$ALERT_TO" ]; then - eval MSG=\$$ALERT_TEXT - echo -e -n "$MSG" | mail -s "$ALERT_SUBJECT" "$ALERT_TO" - if [ -n "$DEBUG" ]; then - shout "Alert sent to $ALERT_TO." - fi + eval alert_subject=\$$ALERT_SUBJECT + eval MSG=\$$ALERT_TEXT + echo -e -n "$MSG" | mail -s "$alert_subject" "$ALERT_TO" + if [ -n "$DEBUG" ]; then + shout "Alert sent to $ALERT_TO." + fi fi } + # To avoid any output in case of a silent operation, # shout instead of echo. -function shout() -{ - if [ -z "$SILENT" ]; then - echo -e "$1" - fi +function shout() { + if [ -z "$SILENT" ]; then + echo -e "$1" + fi } # -----------main----------- +init "$*" checkconnection -rememberstate exit 0 - -