#!/bin/sh # # $Id: $ # # This script can be used to quickly deploy HAST on local host to run # tests. It creates two RAM disks and runs two hastd daemons. # # # Config # BASE=10 PORTBASE=777 HASTCONF=/tmp/hast.conf HOSTNAME=$(hostname -s) RESOURCE=test MOUNT=/mnt DISKTYPE=RAM # RAM or FILE DISKSIZE=512M DISKFILE="./%{node}.img" HASTOPT=-d CONFIG=" #sync_checksum sha256 #checksum sha256 #compression lzf #replication async timeout 20 on ${HOSTNAME} { control /var/run/hastctl.${BASE}%{node} listen 0.0.0.0:${PORTBASE}%{node} } resource ${RESOURCE} { on ${HOSTNAME} { name ${RESOURCE}%{node} local /dev/md${BASE}%{node} remote tcp4://${HOSTNAME}:${PORTBASE}%{rnode} } } " # # Functions # deploy() { local img set -e echo "Creating devices and configs." for i in 1 2; do if ! test -c /dev/md${BASE}${i}; then case "${DISKTYPE}" in FILE) img=`echo "${DISKFILE}" | sed -e "s/%{node}/${i}/g;"` test -f "${img}" || truncate -s "${DISKSIZE}" "${img}" mdconfig -a -t vnode -f "${img}" -u "${BASE}${i}" ;; RAM) mdconfig -a -t swap -s "${DISKSIZE}" -u "${BASE}${i}" ;; *) echo "Wrong DISKTYPE: ${DISKTYPE}." >&2 exit 1; esac fi printf "%s" "$CONFIG" | sed -e "s/%{node}/${i}/g; s/%{rnode}/$((${i} % 2 + 1))/g" \ > "$HASTCONF.${BASE}${i}" done start $@ make_fs mount_fs } start () { local device cfg role echo "Staring hastd." for i in 1 2; do test -n "$1" && HASTOPT="$@" device="/dev/hast/${RESOURCE}${i}" cfg="$HASTCONF.${BASE}${i}" [ $i = 1 ] && role=primary || role=secondary hastd $HASTOPT -c "${cfg}" -P "/var/run/hastd.vm${BASE}${i}.pid" hastctl dump -c "${cfg}" "${RESOURCE}" >/dev/null 2>&1 || hastctl create -c "${cfg}" "${RESOURCE}" hastctl role -c "${cfg}" "${role}" "${RESOURCE}" if [ "${role}" = primary ]; then for i in `jot 50`; do [ -c "${device}" ] && break sleep 0.1 done if [ ! -c "${device}" ]; then echo "Device ${device} didn't appear." >&2 exit 1 fi fi done } stop() { umount_fs for i in 1 2; do hastctl role -c "$HASTCONF.${BASE}${i}" init "${RESOURCE}" pid=`cat "/var/run/hastd.vm${BASE}${i}.pid"` test -n "$pid" && kill "$pid" done } reload() { for i in 1 2; do pid=`cat "/var/run/hastd.vm${BASE}${i}.pid"` test -n "$pid" && kill -HUP "$pid" done } make_fs() { local device for i in 1 2; do device="/dev/hast/${RESOURCE}${i}" if [ -c "${device}" ]; then newfs "${device}" fi done } mount_fs() { local device mkdir -p "${MOUNT}" for i in 1 2; do device="/dev/hast/${RESOURCE}${i}" if [ -c "${device}" ]; then mount "${device}" "${MOUNT}" break fi done } umount_fs() { mount | grep -q "^/dev/hast/${RESOURCE}. on ${MOUNT} " && umount -f "${MOUNT}" } status() { ps auxww | grep '[h]astd' && echo for i in 1 2; do echo -n "NODE ${i} " hastctl status -c "$HASTCONF.${BASE}${i}" "${RESOURCE}" echo done mount | grep "^/dev/hast/${RESOURCE}. on ${MOUNT} " && echo } ctl() { local node cmd node=$1; shift cmd=$1; shift hastctl $cmd -c "$HASTCONF.${BASE}${node}" $@ "${RESOURCE}" } destroy() { stop for i in 1 2; do mdconfig -d -u "${BASE}${i}" done } # With the scenario below sometimes it is possible to get the # follwoing error: # # Mar 14 23:30:31 kopusha hastd[11134]: [test] (primary) Synchronization started. 512MB to go. # Mar 14 23:30:36 kopusha hastd[11134]: [test] (primary) Unable to receive reply header: Resource temporarily unavailable. # Mar 14 23:30:36 kopusha hastd[11134]: [test] (primary) Disconnected from tcp4://kopusha:7772. # Mar 14 23:30:36 kopusha hastd[11134]: [test] (primary) Unable to write synchronization data: Invalid argument. # Mar 14 23:30:36 kopusha hastd[11134]: [test] (primary) Synchronization interrupted after 4s. 128KB synchronized so far. # Mar 14 23:30:36 kopusha hastd[11246]: [test] (secondary) Unable to receive request header: Socket is not connected. # Mar 14 23:30:41 kopusha hastd[11136]: [test] (secondary) Worker process exited ungracefully (pid=11246, exitcode=75). test_sync() { # deploy system and fill with some data destroy && sleep 5 set -e deploy cp -R /usr/src/sys "${MOUNT}/" sleep 2 # recreate secondary to force synchronization ctl 2 role init ctl 2 create ctl 2 role secondary sleep 2 status } # This it to test scenario described by trasz@ on yomoli.com: # # Situation is like this: two nodes, A (primary) and B (secondary). We # make them switch the roles - A becomes secondary, B becomes primary, # but A still has the latest data, so reads on B go to # A. Synchronisation begins, B pulls data from A. Then, link between A # and B gets dropped. What happens, from the point of view of userland # on B? test_trasz() { # Setup. echo "Deploying and filling with some data..." destroy 2> /dev/null && sleep 5 # cleanup from previous tests set -e deploy umount_fs # we don't need FS for this test dd if=/dev/random of="/dev/hast/${RESOURCE}1" bs=1024 count=204800 sleep 2 echo "Done." # Stop node 2 (secondary) and write some data to hast to make # node 2 outdated. echo "Make node 2 outdated..." ctl 2 role init dd if=/dev/random of="/dev/hast/${RESOURCE}1" bs=1024 count=204800 echo "Done." # Make node 2 primary. echo "Failovering..." ctl 1 role secondary ctl 2 role primary # Wait until node 2 connects to secondary. while ctl 2 status |grep -q 'dirty: 0 '; do echo "Waiting..." sleep 0.1 done echo "Done." # At this moment we should have many dirty data and syncsrc is # pointing to secondary (node 1). # Emulate connection drop. echo "Switching node 1 to init..." ctl 1 role init echo "Done." # Try to read something from hast storage. echo "Reading..." dd if="/dev/hast/${RESOURCE}2" of=/dev/null bs=1024 count=204800 || : # The read should failed until secondary is up again: # dd: /dev/hast/test2: Socket is not connected # 0+0 records in # 0+0 records out # 0 bytes transferred in 0.000377 secs (0 bytes/sec) echo "Done." } # Scenario: # # 1. Deploy HAST with node 1 as primary and sync. # 2. Swich node 2 to init and recreate. # 3. Swith node 2 to secondary and wait until sync 1->2 is started. # 4. Change nodes' roles so node 2 is primary now. # (note: sync 1->2 was not complete). # # Expected result: as updated data are on node 1, sync 1->2 should be started. # Actual result: sync 2->1 is started. test_remote_sync() { # deploy system and fill with some data echo "Deploying and filling with some data..." destroy 2> /dev/null && sleep 5 # cleanup from previous tests set -e deploy umount_fs dd if=/dev/random of="/dev/hast/${RESOURCE}1" bs=1024 count=204800 sleep 2 echo "Done." # recreate node 2 to have it outdated ctl 2 role init ctl 2 create ctl 2 role secondary # if we write something to hast at that moment primary local count # will be bumped. But if we don't it won't. # dd if=/dev/random of="/dev/hast/${RESOURCE}1" bs=1024 count=10 # wait while node 1 connects to secondary while ctl 1 status |grep -q 'dirty: 0 '; do echo "Waiting..." sleep 0.1 done echo "Done." # make node 2 primary echo "Failovering..." ctl 1 role secondary ctl 2 role primary # wait while node 2 connects to secondary while ctl 2 status |grep -q 'dirty: 0 '; do echo "Waiting..." sleep 0.1 done echo "Done." # emulate connection drop echo "Switching node 1 to init..." ctl 1 role init echo "Done." # try to read something from hast storage echo "Reading..." dd if="/dev/hast/${RESOURCE}2" of=/dev/null bs=1024 count=204800 || : echo "Done." } # After test_trash or test_remote_sync, sync data between nodes and # switch to node 1 again. # # Expected result: it works. # Actual result: split-brain is detected. split_brain () { ctl 1 role secondary # Wait until node 2 connects to secondary. while ctl 2 status |grep -q 'dirty: 0 '; do echo "Waiting for connection ..." sleep 0.1 done while ! ctl 2 status |grep -q 'dirty: 0 '; do echo "Waiting for sync to complete..." sleep 0.1 done # Change roles echo "Change roles..." ctl 2 role secondary ctl 1 role primary echo "Done." } test_trasz_sb() { test_trasz split_brain } test_remote_sync_sb() { test_remote_sync split_brain } # dd(1) read/write performance depending on bs. # test_dd() { local bs count # deploy system echo "Deploying and filling with some data..." destroy 2> /dev/null && sleep 5 # cleanup from previous tests set -e deploy umount_fs # we don't need fs for this tests echo "Read test." ( echo 512 131072 echo 1024 65536 echo 2048 32768 echo 4096 16384 echo 8192 8192 echo 16384 4096 echo 32768 2048 echo 65536 1024 echo 131072 512 ) | while read bs count; do dd if="/dev/hast/${RESOURCE}1" of=/dev/null bs=${bs} count=${count} done echo "Write test." ( echo 512 4096 echo 1024 2048 echo 2048 1024 echo 4096 512 ) | while read bs count; do dd if=/dev/random of="/dev/hast/${RESOURCE}1" bs=${bs} count=${count} done } # # test running rsync test_rsync() { echo "Deploying ..." destroy 2> /dev/null && sleep 5 # cleanup from previous tests set -e deploy echo "Running rsync on empty FS..." rsync -a /usr/src/sys "${MOUNT}/" echo "Removing some files" find "${MOUNT}/" -type f -name '*.c' | xargs rm -f echo "Running rsync to hast and tmp..." rsync -a /usr/src/sys "/tmp/"& rsync -a /usr/src/sys "${MOUNT}/" # echo "Removing all files" # rm -Rf "${MOUNT}/*" # rm -Rf "/tmp/sys" echo "Running rsync from tmp to hast and from /usr to tmp ..." rsync -a /usr/src/sys "/tmp/"& rsync -a /tmp/sys "${MOUNT}/" } usage() { echo "usage: $0 [options]" >&2 echo "" >&2 echo "commands:" >&2 echo "" >&2 echo " deploy [hastd options] - create devices, start HAST, make and mount FS" >&2 echo " start [hastd options] - start HAST (part of deploy)" >&2 echo " makefs - make FS on HAST (part of deploy)" >&2 echo " mount - mount HAST device (part of deploy)" >&2 echo " status - show current status" >&2 echo " ctl - run hastctl command for the node" >&2 echo " umount - unmount HAST FS" >&2 echo " reload - reload HAST configuration" >&2 echo " stop - stop HAST" >&2 echo " destroy - unmount FS, stop HAST and destroy devices" >&2 echo " test_ - Run test " >&2 echo "" >&2 exit 1 } # # Main # case $1 in deploy) shift; deploy $@ ;; start) shift; start $@ ;; makefs) make_fs ;; mount) mount_fs ;; status) status ;; ctl) test -n "$3" || usage shift; ctl $@ ;; umount) umount_fs ;; reload) reload ;; stop) stop ;; destroy) destroy ;; test_*) $1 ;; *) usage ;; esac