#! /usr/bin/env bash
#
#
# Copyright (c) 2014 Spectra Logic Corporation
# All rights reserved.
# 
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions, and the following disclaimer,
#    without modification.
# 2. Redistributions in binary form must reproduce at minimum a disclaimer
#    substantially similar to the "NO WARRANTY" disclaimer below
#    ("Disclaimer") and any redistribution must be conditioned upon
#    including a substantially similar Disclaimer requirement for further
#    binary redistribution.
# 
# NO WARRANTY
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGES.
# 
#
PROG=$(basename $0)

RC_SUBR=/etc/rc.subr

DEFAULT_PREFIX=/usr/local
PREFIX=${PREFIX:-$DEFAULT_PREFIX}
PATH=${PATH}:${PREFIX}/sbin:${PREFIX}/bin:

LOG_TYPE=sys
LOG_ARGS="-t $PROG"

VERBOSE=no

unset LW_CHILD
LW_WAIT_MAX=5
LW_COMPONENTS=(lwreg dcerpc lsass lwio netlogon npfs pvfs rdr srv srvsvc)
LW_ENABLE_FILES=(/etc/rc.conf /etc/spectra/rc.conf.d/rc.likewise)
LW_PIDFILE=/var/db/likewise/.lwsmd-lock
LW_NAME=/usr/local/sbin/lwsmd

USAGE="
${PROG} validate that the likewise services are operational.

${PROG} usage:
${PROG} [-l <sys|err>] -p <pid_file> -P <proc_name> [-v] [-w <wait max>] 

    where:
    
    -l <sys|err>:       Specifies where to log status messages; 
                        use \"sys\"  for syslog and \"err\" for stderr.
                        Default: \"$LOG_TYPE\"

    -p <pid_file>:      Specifies the lwsmd pid/lock file path name.
                        Default: LW_PIDFILE

    -P <proc_name>:     Specifies the lwsmd ps name.
                        Default: $LW_NAME

    -v:                 Increase verbosity.
                        Default: be rather quiet

    -w <wait max>:      Specifies the maximum wait time for likwise service
                        health check.
                        Default: $LW_WAIT_MAX

    Environment:
    PREFIX:             Specifies installation PREFIX.
                        Default: $DEFAULT_PREFIX

"

. $RC_SUBR

log()
{
	if [ "$LOG_TYPE" == "sys" ]
	then
		logger $LOG_ARGS "$*"
	else
		printf "${PROG}: $*\n" 2>&1
	fi
	return 0
}

verbose_log()
{
	if [ "$VERBOSE" != "no" ]
	then
		log "$*"
	fi
	return 0
}

error()
{
	local rc=$1
	shift 1


	log "$*"

	[ -n "$LW_CHILD" ] && kill $LW_CHILD
	exit $rc
}

# wait_for_child:
#	Give a child process $LW_WAIT_MAX seconds to complete; 
#	kill it if it is taking too long.
# 
wait_for_child()
{
	local child="$1"
	local cmd="$2"
	local t0=$(date +%s)
	local timeout=$((LW_WAIT_MAX))
	local cntmax=$((timeout + 2))
	local cnt=0
	local t1
	local rc
	

	verbose_log "waiting for $cmd $child to complete in $timeout seconds"
	while ((cnt <= cntmax))
	do
		cnt=$((cnt + 1))
		kill -0 $child > /dev/null 2>&1 || {
			wait $child
			rc=$?
			return $rc
		}

		t1=$(date +%s)
		if ((t1 >= t0))
		then
			t1=$((t1 - t0))
			if ((t1 >= timeout))
			then
				log "$cmd $child taking too long, runtime $t1"
				kill $child > /dev/null 2>&1
				kill -0 $child && {
					kill -9 $child > /dev/null 2>&1
				}
				return 1
			fi
		fi
		sleep 1
	done
	
	# Fail safe path in case time goes backwards
	#
	verbose_log "killing $cmd $child"
	kill $child > /dev/null 2>&1
	kill -0 $child && {
		kill -9 $child > /dev/null 2>&1
	}

	# Shouldn't happen therefore don't declare an error.
	#
	return 0
}
		
# lw_enabled:
#	Check that the two likewise services are enabled 
#
#	- If enabled then return 0 to continue health checks.
#	- if not enabled then exit 0 as this is not an error.
#
lw_enabled()
{
	local service_file
	local lwsmd_enable
	local likewise_enable
	local enable_cnt=0


	unset lwsmd_enable
	unset likewise_enable
	
	for service_file in ${LW_ENABLE_FILES[*]}
	do
		[ -s "$service_file" ] && . $service_file
	done
		
	if [ "${lwsmd_enable^^*}" == "YES" ]
	then
		enable_cnt=$((enable_cnt + 1))
	fi

	if [ "${likewise_enable^^*}" == "YES" ]
	then
		enable_cnt=$((enable_cnt + 1))
	fi

	if ((enable_cnt != 2))
	then
		verbose_log "$enable_cnt of 2 likewise components enabled"
		verbose_log "lwsmd_enable = $lwsmd_enable"
		verbose_log "likewise_enable = $likewise_enable"

		exit 0
	fi
	
	verbose_log "all likewise services are enabled"
	return 0
}

# lwsm_status:
#	Use the lwsm status command to check each likewise component.
#
#	- If all components are running the return 0 to continue health checks.
#	- If any component is not running then exit non-zero.
#
lwsm_status()
{
	local service
	local rc=0
	local service_cnt=${#LW_COMPONENTS[*]} 


	for service in ${LW_COMPONENTS[*]}
	do
		lwsm status $service 2>&1 | grep -q running || {
			log "$service not running"
			rc=$((rc + 1))
		}
	done

	if ((rc != 0))
	then
		log "$(lwsm list)"
		error 1 "$rc of $service_cnt likewise services is not running"
	fi

	verbose_log "likewise components are running"
	return 0
}

# lw_lsa_status:
#	Use the lw-lsa command to validate the status of the likewise 
#	security providers.
#
#	- If all components are running the return 0 to continue health checks.
#	- If any component is not running then exit non-zero.
#
lw_lsa_status()
{
	local rc


	# lw-lsa can hange if it can not connect to lsass
	#
	lw-lsa get-status > /dev/null 2>&1 &
	LW_CHILD=$!
	wait_for_child $LW_CHILD lw-lsa
	rc=$?
	unset LW_CHILD

	[ "$rc" -ne 0 ] && {
		error 1 "lw-lsa get-status failed with exit value $rc"
	}

	verbose_log "lsass appears to be operational"
	return 0
}

# lwnet_status:
#	Use the lwnet command to validate the status of the likewise 
#	shares
#
#	- If all components are running the return 0 to continue health checks.
#	- If any component is not running then exit non-zero.
#
lwnet_status()
{
	local rc


	# lwnet can hang if it can not connect to likewise
	#
	lwnet share > /dev/null 2>&1 &
	LW_CHILD=$!
	wait_for_child $LW_CHILD lwnet 
	rc=$?
	unset LW_CHILD

	[ "$rc" -ne 0 ] && {
		error 1 "lwnet share failed with exit value $rc"
	}

	verbose_log "lwnet appears to be operational"
	return 0
}


while getopts "l:p:P:vw:" option
do
	case "$option"
	in
		l)
		if [ "${OPTARG^^*}" == "ERR" ]
		then
			LOG_TYPE=err
		fi
		;;

		p)
		LW_PIDFILE=$OPTARG
		;;

		P)
		LW_NAME=$OPTARG	
		;;

		v)
		VERBOSE=yes
		;;

		w)
		if ((OPTARG > 1))
		then
			LW_WAIT_MAX=$OPTARG
		fi
		;;

		*)
		error 22 "$USAGE"
		;;	
	esac	
done

verbose_log \
     "LW_PIDFILE: ${LW_PIDFILE}, LW_NAME: ${LW_NAME}, LW_WAIT_MAX: $LW_WAIT_MAX"

# We can not check the health if it is not running;
# in this sceanrio, it is monit's responsibility to restart likewise.
#
pid=$(check_pidfile "$LW_PIDFILE" "$LW_NAME")
if [ -z "$pid" ]
then
	verbose_log "lwsmd not running"
	exit 0
fi



# Do not check health on anything but active or active standalone system
#
hpd_state=$(hpdctl state)
verbose_log "hpd_state: $hpd_state"
case "$hpd_state"
in
	HB_ACTIVE)
	;;

	HB_ACTIVE_STANDALONE)
	;;

	*)
	verbose_log "$hpd_state not a valid state for health check"
	exit 0
	;;
esac

# Run all the checks defined above; if one fails then it exits.
#
trap "error 9 caught signal"  SIGINT SIGQUIT SIGTERM SIGHUP

lw_enabled
lwsm_status
lw_lsa_status
lwnet_status

verbose_log "OK"
exit 0
