#!/bin/sh # # USAGE: vtblk.sh file [..fileN] # hit Ctrl-C to end # # Copyright (c) 2021, Juniper Networks, Inc. # usage() { cat <<__END_USAGE__ ${0##*/} [-d] [-h] [-l] [-o outfile [-a]] [-v] path [.. pathN] -a Append to output file (if -o flag is also used) -d Enable debug mode (dtrace is not run, only D script generated) -h Display usage information -l Check for leaked bio structures (requests without replies) -o outfile Log output to 'outfile' -v Verbose __END_USAGE__ exit 0 } Error() { echo ERROR: "$@" >&2 exit 1 } check_leaks=: cmdopts= debug=: dtrace="dtrace" no_append= no_debug= outfile= verbose=0 USER=${USER:-$(id -un)} DTRACE_TMPFILE=${DTRACE_TMPFILE:-/var/tmp/dtrace-vtblk.$USER.$$.d} while : do case "$1" in -a) no_append=: ;; -d) dtrace="echo dtrace"; debug=; no_debug=: ;; -h) usage ;; -l) check_leaks=; shift ;; -o) outfile="$2"; shift ;; -v) verbose=1 ;; --) shift; break ;; -*) Error "Unknown flag: $1" ;; *) break ;; esac shift done test $# -ge 1 || Error path or mount point required swap32() { local s="$1" local b1=${s%??????}; s=${s#??} local b2=${s%????}; s=${s#??} local b3=${s%??} local b4=${s#??} echo "$b4$b3$b2$b1" } get_gpt_dev() { local label # Make sure it is a GPT labelled device node case "$1" in /dev/gpt/*) label=${1#/dev/} ;; *) return ;; esac # Find the partition device that matches the label sysctl -nq kern.geom.conftxt | while read line do # Format: depth type name data IFS=" "; set -- $line : depth: $1, type: $2, name: $3 case "$2" in PART) part=$3 ;; LABEL) test "$3" == "$label" || continue echo "/dev/$part" break ;; esac done } get_mount_info() { local avail blk cap flags fstype gptdev mntdev mntpt used df "$1" 2>/dev/null | tail -1 | \ while read mntdev blk used avail cap mntpt; do # Get the flags from mount -v flags=$(mount -v | grep " on $mntpt (" | \ sed -e 's/.*(\(.*\))/\1/') set -f; IFS=,; set -- $flags; set +f # File system type is the first flag fstype="$1" # If mntdev is a GPT labelled device, try to determine the # actual device that it maps to gptdev=$(get_gpt_dev "$mntdev") # Process the other flags, looking for fsid for f in $@; do : f=$f case "${f## }" in "fsid "*) set -f; IFS=' '; set -- $f; set +f fsid="$2" break ;; esac done # Convert fsid into two 32-bit values, account endianess # The fsid is printed by mount by iterating over the fsid # byte-by-byte and printing each as a 2-digit hex value. # Due to this, little endian causes the values to be # swapped, so we need to do some work in order to use the # values in the DTrace script. local high=${fsid%????????} local low=${fsid#????????} case $(sysctl -qn hw.byteorder) in 4321) # Big endian (nothing to do) ;; 1234) # Little endian (swap bytes) high=$(swap32 "$high") low=$(swap32 "$low") ;; esac local fsidhi=$(printf "%d" "0x$high") local fsidlo=$(printf "%d" "0x$low") # Output the variables such that eval can be used cat <<__VARS__ fsidhi="$fsidhi" fsidlo="$fsidlo" fstype="$fstype" gptdev="$gptdev" mntdev="$mntdev" mntpt="$mntpt" __VARS__ break done } newline=' ' # Process arguments fsids= while test $# -gt 0 do # Reset the variables unset mntdev mntpt # Get the info of the mounted device for the specified path eval $(get_mount_info "$1") test -n "$mntdev" || Error "${1}: Missing or invalid" # Make sure file system type is "ufs" test "$fstype" == "ufs" || Error "${1}: Not on a ufs file system" # Make sure dev is a vtblk device case "$mntdev$gptdev" in */vtbd[0-9]*) ;; *) Error "${1}: Not a vtblk device mount" ;; esac # Add to the list of fsids that we will need to monitor case " $fsids " in *" ${fsidhi}u,${fsidlo}u:${mntdev} "*) ;; *) fsids="$fsids${fsids:+ }${fsidhi}u,${fsidlo}u:${mntdev}" devs="$devs${devs:+${newline}}$mntdev ($fsidhi,$fsidlo)" ;; esac # Shift this argument away shift done cat <<__END_OF_INFO__ Traced devices: $devs __END_OF_INFO__ # Clear the outfile if we are not appending $no_append test -s "$outfile" && rm -f "$outfile" # Build associative array of device fsid,inode number pairs arr=$(echo $fsids | sed 's,\([0-9,u]*\):\([^ ]*\),fsids[\1] = "\2";,g') # Start up the dtrace script cat >${DTRACE_TMPFILE} <<__END_OF_SCRIPT__ #pragma D option quiet const char *fsids[uint32_t,uint32_t]; inline int verbose = $verbose; /* bio_cmd */ inline int BIO_READ = 0x01; inline int BIO_WRITE = 0x02; inline int BIO_DELETE = 0x03; inline int BIO_GETATTR = 0x04; inline int BIO_FLUSH = 0x05; inline int BIO_CMD0 = 0x06; inline int BIO_CMD1 = 0x07; inline int BIO_CMD2 = 0x08; inline int BIO_ZONE = 0x09; /* bio_flags */ inline int BIO_ERROR = 0x01; inline int BIO_DONE = 0x02; inline int BIO_ONQUEUE = 0x04; inline int BIO_ORDERED = 0x08; inline int BIO_UNMAPPED = 0x10; inline int BIO_TRANSIENT_MAPPING = 0x20; inline int BIO_VLIST = 0x40; /* vbr_ack */ inline int VIRTIO_BLK_S_OK = 0; inline int VIRTIO_BLK_S_IOERR = 1; inline int VIRTIO_BLK_S_UNSUPP = 2; BEGIN { $arr printf("%Y Tracing vtblk requests... Hit Ctrl-C to end.\n", walltimestamp); } /* * Save the FSID for file system routines that may be called on vtblk devices * The FSID is compared against the 'fsids' associative array to determine * if the device is one we want to track. */ fbt::ufs_strategy:entry { this->vp = args[0]->a_vp; this->mp = this->vp->v_mount; /* Save the fsid */ self->fsidhi = (uint32_t)this->mp->mnt_stat.f_fsid.val[0]; self->fsidlo = (uint32_t)this->mp->mnt_stat.f_fsid.val[1]; } fbt::vn_fsync_buf:entry { this->vp = args[0]; this->mp = this->vp->v_mount; /* Save the fsid */ self->fsidhi = (uint32_t)this->mp->mnt_stat.f_fsid.val[0]; self->fsidlo = (uint32_t)this->mp->mnt_stat.f_fsid.val[1]; } fbt::ffs_sync:entry { this->mp = args[0]; /* Save the fsid */ self->fsidhi = (uint32_t)this->mp->mnt_stat.f_fsid.val[0]; self->fsidlo = (uint32_t)this->mp->mnt_stat.f_fsid.val[1]; } /* Check if this is the file access we want to monitor */ fbt::ffs_sync:entry, fbt::ufs_strategy:entry, fbt::vn_fsync_buf:entry /!fsids[self->fsidhi, self->fsidlo]/ { /* Let DTrace reclaim the space for the fsid */ self->fsidhi = 0; self->fsidlo = 0; } /* Free up space we used for tracking the FSID */ fbt::ffs_sync:return, fbt::ufs_strategy:return, fbt::vn_fsync_buf:return /self->fsidhi || self->fsidlo/ { /* Let DTrace reclaim the space for the fsid */ self->fsidhi = 0; self->fsidlo = 0; } /* Determine if this GEOM I/O request is one we need to track */ fbt::g_io_request:entry /self->fsidhi || self->fsidlo/ { /* Save bio */ self->bio = args[0]; /* Monitor this bio request? (Only for BIO_READ or BIO_WRITE) */ self->trace = (self->bio->bio_cmd == BIO_READ || self->bio->bio_cmd == BIO_WRITE); } /* If we do not need to trace this bio, reclaim the space we used */ fbt::g_io_request:entry /self->bio && !self->trace/ { /* Let DTrace reclaim the space for bio */ self->bio = 0; } /* Tracing this bio, do some record keeping */ fbt::g_io_request:return /self->trace/ { printf("%Y bio: %p (I/O request on %s [%u,%u])\n", walltimestamp, self->bio, stringof(fsids[self->fsidhi,self->fsidlo]), self->fsidhi, self->fsidlo); /* Mark this bio as one we want to track */ bios[self->bio] = 1; /* Let DTrace reclaim the space for bio and trace */ self->bio = 0; self->trace = 0; } /* * Follow bio through the clone operation * * XXX We may need additional probes to handle cases where the notification * function may change after the clone, otherwise we may leak some * bio tracking records. */ fbt::g_clone_bio:return /self->trace && args[0] == 0/ { printf("%Y bio clone %p -> %p\n", walltimestamp, self->bio, args[1]); /* Trace the cloned bio */ bios[args[1]] = 1; } /* I/O operation completed */ fbt::g_vfs_done:entry, fbt::g_std_done:entry /bios[args[0]]/ { this->bio = args[0]; this->len = this->bio->bio_bcount; printf("%Y bio: %p (I/O done)\n\t", walltimestamp, this->bio); printf("offset: %ld, length: %ld, flags:%s%s%s%s%s%s%s\n\t", this->bio->bio_offset, this->len, (this->bio->bio_flags & BIO_ERROR) != 0 ? " BIO_ERROR" : "", (this->bio->bio_flags & BIO_DONE) != 0 ? " BIO_DONE" : "", (this->bio->bio_flags & BIO_ONQUEUE) != 0 ? " BIO_ONQUEUE" : "", (this->bio->bio_flags & BIO_ORDERED) != 0 ? " BIO_ORDERED" : "", (this->bio->bio_flags & BIO_UNMAPPED) != 0 ? " BIO_UNMAPPED" : "", (this->bio->bio_flags & BIO_TRANSIENT_MAPPING) ? " BIO_TRANSIENT_MAPPING" : "", (this->bio->bio_flags & BIO_VLIST) != 0 ? " BIO_VLIST" : ""); printf("cmd: %s (%d), data: %p, error: %d\n", this->bio->bio_cmd == BIO_READ ? "BIO_READ" : this->bio->bio_cmd == BIO_WRITE ? "BIO_WRITE" : this->bio->bio_cmd == BIO_DELETE ? "BIO_DELETE" : this->bio->bio_cmd == BIO_GETATTR ? "BIO_GETATTR" : this->bio->bio_cmd == BIO_FLUSH ? "BIO_FLUSH" : this->bio->bio_cmd == BIO_CMD0 ? "BIO_CMD0" : this->bio->bio_cmd == BIO_CMD1 ? "BIO_CMD1" : this->bio->bio_cmd == BIO_CMD2 ? "BIO_CMD2" : this->bio->bio_cmd == BIO_ZONE ? "BIO_ZONE" : "", this->bio->bio_cmd, this->bio->bio_data, this->bio->bio_error); } /* Display the data contents if verbose mode and no errors encountered */ fbt::g_vfs_done:entry, fbt::g_std_done:entry /verbose && bios[args[0]] && args[0]->bio_data && (args[0]->bio_flags & BIO_ERROR) == 0 && (args[0]->bio_flags & BIO_UNMAPPED) == 0/ { this->bio = args[0]; this->len = this->bio->bio_bcount; /* Display up to 4 KiB of the data buffer */ tracemem(this->bio->bio_data, 4096, this->len); } /* Clean up tracking space for the bio */ fbt::g_destroy_bio:entry /bios[args[0]]/ { printf("%Y bio: %p (Destroying)\n", walltimestamp, args[0]); /* Let DTrace reclaim the space for this bio tracking entry */ bios[args[0]] = 0; } /* * Look for virtqueue enqueues on vtblk device virtqueue * * The only way we can tell if it is a vtblk device virtqueue is to compare * the 'vq_intrhand' member of the virtqueue structure with the interrupt * handler function used by vtblk device driver (vtblk_vq_intr). */ fbt::virtqueue_enqueue:entry /args[0]->vq_intrhand == (virtqueue_intr_t *)&virtio_blk.ko\`vtblk_vq_intr/ { self->vtblk_req = (struct vtblk_request *)args[1]; self->vbr_bp = self->vtblk_req->vbr_bp; } /* * Track sglist used in virtqueue enqueue request * * If this is vtblk device virtqueue, vbr_bp will be non-zero. Keep track * of the scatter/gather list in this case so we can map the virtqueue * indexes used with the bio request. */ fbt::virtqueue_enqueue:entry /self->vbr_bp && bios[self->vbr_bp]/ { self->sg = args[2]; /*printf("fbt::virtqueue_enqueue:entry: vbr_bp %p, sg %p\n", self->vbr_bp, self->sg);*/ } /* Reclaim the space used for tracking the virtqueue enqueue */ fbt::virtqueue_enqueue:return { /* Let DTrace reclaim the space used in entry */ self->vtblk_req = 0; self->vbr_bp = 0; self->sg = 0; } /* Track the segments that may be enqueued into the virtqueue */ virtqueue::enqueue_segments:entry /self->sg/ { self->vq = args[0]; self->head_idx = args[2]; self->readable = args[4]; self->writable = args[5]; /*printf("virtqueue::enqueue_segments:entry: vq %p, head_idx %u, readable %d, writable %d\n", self->vq, self->head_idx, self->readable, self->writable); */ } /* * Display the information about segments enqueued in the virtqueue. * * Reclaim the space used. */ virtqueue::enqueue_segments:return /self->sg/ { printf("%Y bio: %p (virtqueue request), vq: %p\n\t", walltimestamp, self->vbr_bp, self->vq); printf("sg: %p, start idx: %u, end idx: %u\n\t", self->sg, self->head_idx, args[0]); printf("readable: %d, writeable: %d\n", self->readable, self->writable); /* Let DTrace reclaim the space used in entry */ self->vq = 0; self->head_idx = 0; self->readable = 0; self->writable = 0; } /* Track the virtqueue reply via the softc structure */ fbt::vtblk_vq_intr:entry { self->reply_sc = args[0]; } /* Reclaim the space used by tracking the softc structure */ fbt::vtblk_vq_intr:return { self->reply_sc = 0; } /* Track the descriptor indexes used in the virtqueue reply */ fbt::virtqueue_dequeue:entry /self->reply_sc/ { self->reply_vq = args[0]; self->used_idx = self->reply_vq->vq_used_cons_idx & (self->reply_vq->vq_nentries - 1); this->uep = &self->reply_vq->vq_ring.used->ring[self->used_idx]; self->desc_idx = this->uep->id; } /* Save the vtblk request that corresponds with the virtqueue reply */ fbt::virtqueue_dequeue:return /self->reply_vq && args[1]/ { self->reply_req = (struct vtblk_request *)args[1]; } /* * Display information about the virtqueue reply, if it is for a bio that * we are tracking. */ fbt::virtqueue_dequeue:return /self->reply_req && bios[self->reply_req->vbr_bp]/ { printf("%Y bio: %p (virtqueue reply), vq: %p\n\t", walltimestamp, self->reply_req->vbr_bp, self->reply_vq); printf("used idx: %u, desc idx: %u, ack: %d, bio error: %s\n", self->used_idx, self->desc_idx, self->reply_req->vbr_ack, this->bio_error = self->reply_req->vbr_ack == VIRTIO_BLK_S_OK ? "0" : (self->reply_req->vbr_ack == VIRTIO_BLK_S_UNSUPP ? "ENOTSUP" : "EIO")); } /* Reclaim space used while tracking the virtqueue reply */ fbt::virtqueue_dequeue:return /self->reply_vq/ { /* Let DTrace reclaim the space used in entry */ self->reply_vq = 0; self->reply_req = 0; self->used_idx = 0; self->desc_idx = 0; } __END_OF_SCRIPT__ $no_debug trap "rm -f ${DTRACE_TMPFILE}" EXIT $debug echo Run the following command to run the script $no_debug echo Running dtrace script ${DTRACE_TMPFILE} ... ${dtrace} ${outfile:+-o "$outfile"} -s ${DTRACE_TMPFILE} || \ Error run with -d flag if you want to see the generated script