#!/bin/sh
#
# $Id: $
#
# This script can be used to quickly deploy HAST on local host to run
# tests. It creates two RAM disks and runs two hastd daemons.
#

#
# Config
#

BASE=10
PORTBASE=777
HASTCONF=/tmp/hast.conf
HOSTNAME=$(hostname -s)
RESOURCE=test
MOUNT=/mnt
DISKTYPE=RAM # RAM or FILE
DISKSIZE=512M
DISKFILE="./%{node}.img"
HASTOPT=-d
CONFIG="
#sync_checksum sha256
#checksum sha256
#compression lzf
#replication async
timeout 20

on ${HOSTNAME} {
	control /var/run/hastctl.${BASE}%{node}
	listen  0.0.0.0:${PORTBASE}%{node}
}
	
resource ${RESOURCE} { 
	
	on  ${HOSTNAME} {
		name ${RESOURCE}%{node}
		local /dev/md${BASE}%{node}
		remote tcp4://${HOSTNAME}:${PORTBASE}%{rnode}
        }
}
"

#
# Functions
#

deploy()
{
    local img

    set -e
    
    echo "Creating devices and configs."

    for i in 1 2; do

	if ! test -c /dev/md${BASE}${i}; then
	    case "${DISKTYPE}" in
		FILE)
		    img=`echo "${DISKFILE}" | sed -e "s/%{node}/${i}/g;"`
		    
		    test -f "${img}" ||
		    truncate -s "${DISKSIZE}" "${img}"

		    mdconfig -a -t vnode -f "${img}" -u "${BASE}${i}"
		    ;;
		RAM)
		    mdconfig -a -t swap -s "${DISKSIZE}" -u "${BASE}${i}"
		    ;;
		*)
		    echo "Wrong DISKTYPE: ${DISKTYPE}." >&2
		    exit 1;
	    esac
	fi
	
	printf "%s" "$CONFIG" |
	sed -e "s/%{node}/${i}/g; s/%{rnode}/$((${i} % 2 + 1))/g" \
	    > "$HASTCONF.${BASE}${i}"
    done

    start $@
    make_fs
    mount_fs
}

start ()
{
    local device cfg role

    echo "Staring hastd."

    for i in 1 2; do

	test -n "$1" && HASTOPT="$@"

	device="/dev/hast/${RESOURCE}${i}"
	cfg="$HASTCONF.${BASE}${i}"
	[ $i = 1 ] && role=primary || role=secondary

	hastd $HASTOPT -c "${cfg}" -P "/var/run/hastd.vm${BASE}${i}.pid"

	hastctl dump  -c "${cfg}" "${RESOURCE}" >/dev/null 2>&1 ||
	hastctl create -c "${cfg}" "${RESOURCE}"

	hastctl role -c "${cfg}" "${role}" "${RESOURCE}"

	if [ "${role}" = primary ]; then
	    for i in `jot 50`; do
		[ -c "${device}" ] && break
		sleep 0.1
	    done
	    if [ ! -c "${device}" ]; then
		echo "Device ${device} didn't appear." >&2
		exit 1
	    fi
	fi
    done    
}

stop()
{
    umount_fs

    for i in 1 2; do
	hastctl role -c "$HASTCONF.${BASE}${i}" init "${RESOURCE}"
	pid=`cat "/var/run/hastd.vm${BASE}${i}.pid"`
	test -n "$pid" && kill "$pid"
    done
}

reload()
{
    for i in 1 2; do
	pid=`cat "/var/run/hastd.vm${BASE}${i}.pid"`
	test -n "$pid" && kill -HUP "$pid"
    done
}

make_fs()
{
    local device

    for i in 1 2; do
	device="/dev/hast/${RESOURCE}${i}"

	if [ -c "${device}" ]; then
	    newfs "${device}"
	fi
    done
}

mount_fs()
{
    local device

    mkdir -p "${MOUNT}"

    for i in 1 2; do
	device="/dev/hast/${RESOURCE}${i}"

	if [ -c "${device}" ]; then
	    mount "${device}" "${MOUNT}"
	    break
	fi
    done
}

umount_fs()
{
    mount | grep -q "^/dev/hast/${RESOURCE}. on ${MOUNT} " &&
    umount -f "${MOUNT}"
}

status()
{
    ps auxww | grep '[h]astd' && echo

    for i in 1 2; do
	echo -n "NODE ${i} "
	hastctl status -c "$HASTCONF.${BASE}${i}" "${RESOURCE}"
	echo
    done

    mount | grep "^/dev/hast/${RESOURCE}. on ${MOUNT} " && echo
}

ctl()
{
    local node cmd

    node=$1; shift
    cmd=$1; shift

    hastctl $cmd -c "$HASTCONF.${BASE}${node}" $@ "${RESOURCE}"
}

destroy()
{
    stop
    for i in 1 2; do
	mdconfig -d -u "${BASE}${i}"
    done
}


# With the scenario below sometimes it is possible to get the
# follwoing error:
#
# Mar 14 23:30:31 kopusha hastd[11134]: [test] (primary) Synchronization started. 512MB to go.
# Mar 14 23:30:36 kopusha hastd[11134]: [test] (primary) Unable to receive reply header: Resource temporarily unavailable.
# Mar 14 23:30:36 kopusha hastd[11134]: [test] (primary) Disconnected from tcp4://kopusha:7772.
# Mar 14 23:30:36 kopusha hastd[11134]: [test] (primary) Unable to write synchronization data: Invalid argument.
# Mar 14 23:30:36 kopusha hastd[11134]: [test] (primary) Synchronization interrupted after 4s. 128KB synchronized so far.
# Mar 14 23:30:36 kopusha hastd[11246]: [test] (secondary) Unable to receive request header: Socket is not connected.
# Mar 14 23:30:41 kopusha hastd[11136]: [test] (secondary) Worker process exited ungracefully (pid=11246, exitcode=75).

test_sync()
{
    # deploy system and fill with some data
    destroy && sleep 5
    set -e
    deploy
    cp -R /usr/src/sys "${MOUNT}/"
    sleep 2

    # recreate secondary to force synchronization
    ctl 2 role init
    ctl 2 create
    ctl 2 role secondary
    sleep 2
    status
}

# This it to test scenario described by trasz@ on yomoli.com:
#
# Situation is like this: two nodes, A (primary) and B (secondary). We
# make them switch the roles - A becomes secondary, B becomes primary,
# but A still has the latest data, so reads on B go to
# A. Synchronisation begins, B pulls data from A. Then, link between A
# and B gets dropped. What happens, from the point of view of userland
# on B?

test_trasz()
{
    # Setup.
    echo "Deploying and filling with some data..."
    destroy 2> /dev/null && sleep 5 # cleanup from previous tests
    set -e
    deploy
    umount_fs # we don't need FS for this test
    dd if=/dev/random of="/dev/hast/${RESOURCE}1" bs=1024 count=204800
    sleep 2
    echo "Done."

    # Stop node 2 (secondary) and write some data to hast to make
    # node 2 outdated.
    echo "Make node 2 outdated..."
    ctl 2 role init
    dd if=/dev/random of="/dev/hast/${RESOURCE}1" bs=1024 count=204800
    echo "Done."

    # Make node 2 primary.
    echo "Failovering..."
    ctl 1 role secondary
    ctl 2 role primary

    # Wait until node 2 connects to secondary.
    while ctl 2 status |grep -q 'dirty: 0 '; do
	echo "Waiting..."
	sleep 0.1
    done
    echo "Done."
    
    # At this moment we should have many dirty data and syncsrc is
    # pointing to secondary (node 1).

    # Emulate connection drop.
    echo "Switching node 1 to init..."
    ctl 1 role init
    echo "Done."

    # Try to read something from hast storage.
    echo "Reading..."
    dd if="/dev/hast/${RESOURCE}2" of=/dev/null bs=1024 count=204800 || :
    # The read should failed until secondary is up again:
    # dd: /dev/hast/test2: Socket is not connected
    # 0+0 records in
    # 0+0 records out
    # 0 bytes transferred in 0.000377 secs (0 bytes/sec)
    echo "Done."
}

# Scenario:
#
# 1. Deploy HAST with node 1 as primary and sync.
# 2. Swich node 2 to init and recreate.
# 3. Swith node 2 to secondary and wait until sync 1->2 is started.
# 4. Change nodes' roles so node 2 is primary now.
#    (note: sync 1->2 was not complete).
#
# Expected result: as updated data are on node 1, sync 1->2 should be started.
# Actual result: sync 2->1 is started.

test_remote_sync()
{
    # deploy system and fill with some data
    echo "Deploying and filling with some data..."
    destroy 2> /dev/null && sleep 5 # cleanup from previous tests
    set -e
    deploy
    umount_fs
    dd if=/dev/random of="/dev/hast/${RESOURCE}1" bs=1024 count=204800
    sleep 2
    echo "Done."

    # recreate node 2 to have it outdated
    ctl 2 role init
    ctl 2 create
    ctl 2 role secondary

    # if we write something to hast at that moment primary local count
    # will be bumped. But if we don't it won't.
    # dd if=/dev/random of="/dev/hast/${RESOURCE}1" bs=1024 count=10

    # wait while node 1 connects to secondary
    while ctl 1 status |grep -q 'dirty: 0 '; do
	echo "Waiting..."
	sleep 0.1
    done
    echo "Done."

    # make node 2 primary
    echo "Failovering..."
    ctl 1 role secondary
    ctl 2 role primary

    # wait while node 2 connects to secondary
    while ctl 2 status |grep -q 'dirty: 0 '; do
	echo "Waiting..."
	sleep 0.1
    done
    echo "Done."
    
    # emulate connection drop
    echo "Switching node 1 to init..."
    ctl 1 role init
    echo "Done."

    # try to read something from hast storage
    echo "Reading..."
    dd if="/dev/hast/${RESOURCE}2" of=/dev/null bs=1024 count=204800 || :
    echo "Done."
}

# After test_trash or test_remote_sync, sync data between nodes and
# switch to node 1 again.
#
# Expected result: it works.
# Actual result: split-brain is detected.

split_brain ()
{
    ctl 1 role secondary
    # Wait until node 2 connects to secondary.
    while ctl 2 status |grep -q 'dirty: 0 '; do
	echo "Waiting for connection ..."
	sleep 0.1
    done
    while ! ctl 2 status |grep -q 'dirty: 0 '; do
	echo "Waiting for sync to complete..."
	sleep 0.1
    done
    # Change roles
    echo "Change roles..."
    ctl 2 role secondary
    ctl 1 role primary
    echo "Done."
}

test_trasz_sb()
{
    test_trasz
    split_brain
}

test_remote_sync_sb()
{
    test_remote_sync
    split_brain
}


# dd(1) read/write performance depending on bs.
#

test_dd()
{

    local bs count

    # deploy system
    echo "Deploying and filling with some data..."
    destroy 2> /dev/null && sleep 5 # cleanup from previous tests
    set -e
    deploy
    umount_fs # we don't need fs for this tests

    echo "Read test."

    (
	echo	512	131072
	echo	1024	65536
	echo	2048	32768
	echo	4096	16384
	echo	8192	8192
	echo	16384	4096
	echo	32768	2048
	echo	65536	1024
	echo	131072	512
    ) |
    while read bs count; do
	dd if="/dev/hast/${RESOURCE}1" of=/dev/null bs=${bs} count=${count}
    done

    echo "Write test."

    (
	echo	512	4096	
	echo	1024	2048
	echo	2048	1024
	echo	4096	512
    ) |
    while read bs count; do
	dd if=/dev/random of="/dev/hast/${RESOURCE}1" bs=${bs} count=${count}
    done

}

#
# test running rsync

test_rsync()
{
    echo "Deploying ..."
    destroy 2> /dev/null && sleep 5 # cleanup from previous tests
    set -e
    deploy

    echo "Running rsync on empty FS..."
    rsync -a /usr/src/sys "${MOUNT}/"

    echo "Removing some files"
    find "${MOUNT}/" -type f -name '*.c' |
    xargs rm -f

    echo "Running rsync to hast and tmp..."
    rsync -a /usr/src/sys "/tmp/"&
    rsync -a /usr/src/sys "${MOUNT}/"
    
    # echo "Removing all files"
    # rm -Rf "${MOUNT}/*"
    # rm -Rf "/tmp/sys"
    
    echo "Running rsync from tmp to hast and from /usr to tmp ..."
    rsync -a /usr/src/sys "/tmp/"&
    rsync -a /tmp/sys "${MOUNT}/"
}


usage()
{
    echo "usage: $0 <command> [options]"                                             >&2
    echo ""                                                                          >&2
    echo "commands:"                                                                 >&2
    echo ""                                                                          >&2
    echo "  deploy [hastd options]  - create devices, start HAST, make and mount FS" >&2
    echo "  start [hastd options]   - start HAST (part of deploy)"                   >&2
    echo "  makefs                  - make FS on HAST (part of deploy)"              >&2
    echo "  mount                   - mount HAST device (part of deploy)"            >&2
    echo "  status                  - show current status"                           >&2
    echo "  ctl <node> <command>    - run hastctl command for the node"              >&2
    echo "  umount                  - unmount HAST FS"                               >&2
    echo "  reload                  - reload HAST configuration"                     >&2
    echo "  stop                    - stop HAST"                                     >&2
    echo "  destroy                 - unmount FS, stop HAST and destroy devices"     >&2
    echo "  test_<name>             - Run test                                 "     >&2
    echo ""                                                                          >&2
    exit 1
}

#
# Main
#

case $1 in
    deploy)
	shift;
	deploy $@
	;;
    start)
	shift;
	start $@
	;;
    makefs)
	make_fs
	;;
    mount)
	mount_fs
	;;
    status)
	status
	;;
    ctl)
	test -n "$3" || usage
	shift;
	ctl $@
	;;
    umount)
	umount_fs
	;;
    reload)
	reload
	;;
    stop)
	stop
	;;
    destroy)
	destroy
	;;
    test_*)
	$1
	;;
    *)
	usage
	;;
esac