#! /bin/sh
# $AdminSVN$
# quick and dirty check for:
# any pool that's filling up
# anything we can't see a scrub for in a reasonable time window

TZ=UTC
PATH=/sbin:/bin:/usr/sbin:/usr/bin
export TZ PATH

if [ ! -e /dev/zfs ]; then
  exit 0
fi

# one complaint per day
if [ -f /var/db/zfscheck_complained ]; then
  find /var/db/zfscheck_complained -mtime +1d -exec rm -f {} \;
fi
rm -f /var/db/zfscheck_complaints

zpool list -H -o name,cap | while read pool cap
do
  cap=$(echo $cap | sed -e 's/%//')
  if [ $cap -ge 80 ]; then
    echo "pool $pool is at $cap % full" >> /var/db/zfscheck_complaints
  fi
done
zpool list -H -o name | while read pool
do
  lastscrub=0
  scan=$(zpool status $pool | sed -ne 's/^  scan: \(.*\)/\1/p')
  #echo $pool scan line $scan
  none=$(echo "$scan" | grep 'none requested')
  if [ "x$none" != "x" ]; then
    #echo $pool none requested
    lastscrub=0
  fi
  completed=$(echo "$scan" | sed -ne 's/scrub .* errors on \(.*\)/\1/p')
  if [ "x$completed" != "x" ]; then
    #echo $pool completed $completed
    lastscrub=$(date -j -f "%a %b %d %T %Y" "$completed" "+%s")
  fi
  inprog=$(echo "$scan" | sed -ne 's/scrub .* (in progress since|cancelled on) \(.*\)/\1/p')
  if [ "x$inprog" != "x" ]; then
    #echo $pool in prog $inprog
    lastscrub=$(date -j -f "%a %b %d %T %Y" "$inprog" "+%s")
  fi
  now=$(date +%s)
  seconds=$(($now - $lastscrub))
  days=$((seconds / 3600 / 24))
  if [ $days -gt 60 ]; then
    echo "pool $pool has not been scanned for 60+ days: $scan" >> /var/db/zfscheck_complaints
  fi
done
if [ -f /var/db/zfscheck_complained ]; then
  # leave the lag output though
  exit 0
fi
if [ -f /var/db/zfscheck_complaints ]; then
  mail -s "[clusteradm] ZFS problem on $(hostname | tr A-Z a-z)" peter@freebsd.org < /var/db/zfscheck_complaints &
  touch /var/db/zfscheck_complained
fi
exit 0