File:  [Local Repository] / badi / public_scripts / canardien / canardien
Revision 1.6: download - view: text, annotated - select for diffs - revision graph
Sun Aug 6 20:06:51 2023 UTC (8 months, 4 weeks ago) by adi
Branches: MAIN
CVS tags: HEAD
Parallel execution when guarding multiple hosts.

#!/bin/bash

# canardien 0.0.6
# (c) 2005-2021 under GPL by Adrian Zaugg


# canardien [<host> [<host> ...]]

# canardiens pings a machine, originally ente.limmat.ch thus its name,
# to determine wether she is gone diving.


## Settings
#

# Hosts to ping. use a space separated list for multiple targets. All
# hosts of the list are checked and their results reported individually.
# Hosts given on the command line overwrite this setting.
HOSTS=""

# eMail Alerts
# Send alert email messages to the following address(es). Leave empty
# for no alert. For multiple destinations use a comma separated list.
ALERT_TO="root"

# Subject of alert email
ALERT_SUBJECT='"Attention: no answer from $HOST anymore!"'

# Text of Message (Put variables in 'single quotes' to protect them. They
# should get expanded at the time the message is sent!)
ALERT_TEXT='"\n[$TIME_STAMP]\n\nALERT!!\n\n\t$HOST is down!\n\nYou should probably do something, please.\n\nKind regards, $PINGHOST."'

# Path to fping
PING=""

# Answer of fping to reachable hosts
ALIVE_ANSWER="is alive"

# Max number of pakets to send before giving up. Time increases exponentially,
# use a number < 7.
RETRIES=5

# Temporary file path
TMPDIR="/tmp"

# This hosts name
PINGHOST="$(uname -n)"

# Set to an empty string to avoid debug output,
# to "low" for a few, output and to anything else
# for verbose output
DEBUG=verbose

# For silent (non-error) operation set to anything,
# comment out to enable text output
#SILENT=shshsh


# -----------functions-----------

# Initialize.
function init() {

	# check fping existence
	if [ -z "$PING" ]; then
		PING="$(which fping)"
	fi
	if [ ${#PING} -eq 0 ]; then
		echo "Error: fping external program not found or not set. Exitting." >&2
		exit 1
	fi
	if [ ! -x "$PING" ]; then
		echo "Error: Can't execute the program set to use as fping. Please enter the correct path to \"fping\"." >&2
		exit 1
	fi

	# get hostset
	if [ -n "$*" ]; then
		HOSTS="$*"
	fi

	# remove multiple spaces and trim
	HOSTS="$(echo "$HOSTS" | sed -e "s/ \{1,\}/ /g" -e "s/^ //" -e "s/ $//")"

	if [ -z "$HOSTS" ]; then
		echo "Error: No host to ping." >&2
		exit 1
	fi
}


# If a host responds to pings, it is considered up.
function checkconnection() {

	unset UP

	# ping host
	PING_ANSWER="$($PING -R -B 2 -r $RETRIES -p 50 "$HOST" 2>&1)"
	PING_ERRNUM=$?
	if [ $PING_ERRNUM -gt 2 ]; then
		echo "Error: Got error $PING_ERRNUM from fping $(head -1 "$PING_ANSWER"). Disregarding $HOST." >&2
		continue;
	fi

	# parse answer
	if [ $(echo "$PING_ANSWER" | grep -c "$ALIVE_ANSWER") -gt 0 ]; then
		UP=true
	fi
	TIME_STAMP="$(date +"%a %e.%m.%y %H:%M:%S")"
	if [ -n "$UP" ]; then
		if [ ! "$DEBUG" = "low" ] && [ -n "$DEBUG"  ]; then
			shout "The connection to $HOST is up."
		fi
	else
		if [ -n "$DEBUG" ]; then
			shout "The connection to $HOST is down."
		fi
	fi

	# remember a hosts state
	rememberstate
}


# Rember recent state of host, trigger alert
function rememberstate() {
	# Put a simple time stamp in a tmp file, when host is detected as down for the first time
	if [ -z "$UP" ]; then
		if [ ! -e "$TMPDIR/.canardien-$PINGHOST-$HOST" ]; then
				# set time stamp
				echo -ne "$TIME_STAMP" > "$TMPDIR/.canardien-$PINGHOST-$HOST"
				# send alert
				sendalert
			elif [ -n "$DEBUG" ]; then
				shout "Down since `cat $TMPDIR/.canardien-$PINGHOST-$HOST`."
			fi
	else
	  # Host is up
		if [ -e "$TMPDIR/.canardien-$PINGHOST-$HOST" ]; then
			if [ -n "$DEBUG" ]; then
				shout "$HOST is up again."
			fi
			# send alert
			ALERT_SUBJECT='"The host $HOST answers again!"'
			ALERT_TEXT='"[$(date +"%a %e.%m.%y %H:%M:%S")]\n\n\n$HOST is up again.\n--------------------------------------\n(downtime began $(cat "$TMPDIR/.canardien-$PINGHOST-$HOST"))\n\n\n		Kind regards, $PINGHOST."'
			sendalert
			# delete tmp file
			rm "$TMPDIR/.canardien-$PINGHOST-$HOST"
		else
			# still running
			if [ ! "$DEBUG" = "low" ] && [ -n "$DEBUG"  ]; then
				shout "$HOST is up."
			fi
		fi
	fi
}


# Send an email alert
function sendalert() {
	# send mail
	if [ ! -z "$ALERT_TO" ]; then
			eval alert_subject=\$$ALERT_SUBJECT
			eval MSG=\$$ALERT_TEXT
			echo -e -n "$MSG" | mail -s "$alert_subject" "$ALERT_TO"
			if [ -n "$DEBUG" ]; then
				shout "Alert sent to $ALERT_TO."
			fi
	fi
}


# To avoid any output in case of a silent operation,
# shout instead of echo.
function shout() {
	if [ -z "$SILENT" ]; then
		echo -e "$1"
	fi
}


# -----------main-----------

init "$*"

# call an instance for each host, when multiple hosts are given
if [[ "$HOSTS" =~ " " ]]; then
	while read -r HOST; do
		"$0" "$HOST" &
	done <<< "$(echo "$HOSTS" | tr ' ' '\n')"
	exit 0
else
	# process a single host
	HOST="$HOSTS"
	checkconnection
fi

exit 0

# todo: Switches ( -q, --debug, ...)
#	grace period: do not send a mail if down time shorter than X minutes
#	repeat down info after x hours
#	test port instead of ping or in addition


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>