GENERIC HEAD from 2010-01-19 11:42:15 UTC, r202614M, vmcore.44

KDB: debugger backends: ddb
KDB: current backend: ddb
Copyright (c) 1992-2010 The FreeBSD Project.
Copyright (c) 1979, 1980, 1983, 1986, 1988, 1989, 1991, 1992, 1993, 1994
	The Regents of the University of California. All rights reserved.
FreeBSD is a registered trademark of The FreeBSD Foundation.
FreeBSD 9.0-CURRENT #0 r202614M: Tue Jan 19 13:59:33 CET 2010
    pho@crashbox.osted.lan:/usr/src/sys/i386/compile/PHO i386
WARNING: WITNESS option enabled, expect reduced performance.
WARNING: DIAGNOSTIC option enabled, expect reduced performance.
Timecounter "i8254" frequency 1193182 Hz quality 0
CPU: Intel(R) XEON(TM) CPU 1.80GHz (1799.80-MHz 686-class CPU)
  Origin = "GenuineIntel"  Id = 0xf24  Stepping = 4
  Features=0x3febfbff<FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CLFLUSH,DTS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM>
real memory  = 1073741824 (1024 MB)
avail memory = 1031360512 (983 MB)
:
Trying to mount root from ufs:/dev/ad0s1a
WARNING: / was not properly dismounted
Enter full pathname of shell or RETURN for /bin/sh: 
# fsck -y /
** /dev/ad0s1a
** Last Mounted on /
** Root file system
** Phase 1 - Check Blocks and Sizes
** Phase 2 - Check Pathnames
** Phase 3 - Check Connectivity
** Phase 4 - Check Reference Counts
** Phase 5 - Check Cyl groups
4408 files, 238909 used, 267578 free (602 frags, 33372 blocks, 0.1% fragmentation)

***** FILE SYSTEM MARKED CLEAN *****
# newfs -U /dev/ad0s1e > /dev/null
# tunefs -j enable /dev/ad0s1e
Using inode 4 in cg 0 for 33554432 byte journal
tunefs: soft updates journaling set
# mount /tmp
# umount /tmp
lock order reversal:
 1st 0xc44c046c ufs (ufs) @ kern/vfs_mount.c:1204
 2nd 0xc46c5b38 devfs (devfs) @ ufs/ffs/ffs_vfsops.c:1236
KDB: stack backtrace:
db_trace_self_wrapper(c0cabf9f,e69139c8,c08d8065,c08c8abb,c0caef50,...) at db_trace_self_wrapper+0x26
kdb_backtrace(c08c8abb,c0caef50,c413e1c0,c413e088,e6913a24,...) at kdb_backtrace+0x29
_witness_debugger(c0caef50,c46c5b38,c0c9d438,c413e088,c0cd1c0a,...) at _witness_debugger+0x25
witness_checkorder(c46c5b38,9,c0cd1c01,4d4,c46c5ba4,...) at witness_checkorder+0x839
__lockmgr_args(c46c5b38,80400,c46c5ba4,0,0,...) at __lockmgr_args+0x804
vop_stdlock(e6913b40,c0cd2acf,79,80400,c46c5ae0,...) at vop_stdlock+0x65
VOP_LOCK1_APV(c0d929a0,e6913b40,c46c515c,c0dd3fa0,c46c5ae0,...) at VOP_LOCK1_APV+0xb5
_vn_lock(c46c5ae0,80400,c0cd1c01,4d4,c46bbb00,...) at _vn_lock+0x78
ffs_flushfiles(c4764000,0,c4733900,e6913bc8,3,...) at ffs_flushfiles+0x11a
softdep_flushfiles(c4764000,0,c4733900,0,1,...) at softdep_flushfiles+0x2e
ffs_unmount(c4764000,8000000,c0cb58e4,4f9,80,...) at ffs_unmount+0x18f
dounmount(c4764000,8000000,c4733900,47e,2d9c374f,...) at dounmount+0x46d
unmount(c4733900,e6913cf8,8,c4733900,c0d96768,...) at unmount+0x2ff
syscall(e6913d38) at syscall+0x2b4
Xint0x80_syscall() at Xint0x80_syscall+0x20
--- syscall (22, FreeBSD ELF32, unmount), eip = 0x280dae8f, esp = 0xbfbfe64c, ebp = 0xbfbfe718 ---
# fsck -y
** /dev/ad0s1a
** Last Mounted on /
** Root file system
** Phase 1 - Check Blocks and Sizes
** Phase 2 - Check Pathnames
** Phase 3 - Check Connectivity
** Phase 4 - Check Reference Counts
** Phase 5 - Check Cyl groups
4408 files, 238909 used, 267578 free (602 frags, 33372 blocks, 0.1% fragmentation)

***** FILE SYSTEM IS CLEAN *****
** /dev/ad0s1f (NO WRITE)
** Last Mounted on /home
** Phase 1 - Check Blocks and Sizes
** Phase 2 - Check Pathnames
** Phase 3 - Check Connectivity
** Phase 4 - Check Reference Counts
** Phase 5 - Check Cyl groups
1962 files, 60070 used, 446417 free (513 frags, 55738 blocks, 0.1% fragmentation)
** /dev/ad0s1e
** Last Mounted on /tmp
** Phase 1 - Check Blocks and Sizes
** Phase 2 - Check Pathnames
** Phase 3 - Check Connectivity
** Phase 4 - Check Reference Counts
** Phase 5 - Check Cyl groups
3 files, 16394 used, 2012637 free (21 frags, 251577 blocks, 0.0% fragmentation)

***** FILE SYSTEM IS CLEAN *****
** /dev/ad0s1d
** Last Mounted on /usr
** Phase 1 - Check Blocks and Sizes
** Phase 2 - Check Pathnames
** Phase 3 - Check Connectivity
** Phase 4 - Check Reference Counts
** Phase 5 - Check Cyl groups
429322 files, 2188686 used, 2888393 free (50353 frags, 354755 blocks, 1.0% fragmentation)

***** FILE SYSTEM MARKED CLEAN *****
** /dev/ad0s1g
** Last Mounted on /var
** Phase 1 - Check Blocks and Sizes
** Phase 2 - Check Pathnames
** Phase 3 - Check Connectivity
** Phase 4 - Check Reference Counts
** Phase 5 - Check Cyl groups
394857 files, 7446128 used, 18445217 free (23449 frags, 2302721 blocks, 0.1% fragmentation)

***** FILE SYSTEM MARKED CLEAN *****
# umount /home
# fsck -y /home
** /dev/ad0s1f
** Last Mounted on /home
** Phase 1 - Check Blocks and Sizes
** Phase 2 - Check Pathnames
** Phase 3 - Check Connectivity
** Phase 4 - Check Reference Counts
** Phase 5 - Check Cyl groups
1962 files, 60070 used, 446417 free (513 frags, 55738 blocks, 0.1% fragmentation)

***** FILE SYSTEM MARKED CLEAN *****
# exit
Entropy harvesting: interrupts ethernet point_to_point kickstart.
Fast boot: skipping disk checks.
lock order reversal:
 1st 0xd81027c0 bufwait (bufwait) @ kern/vfs_bio.c:2581
 2nd 0xc46a5c00 dirhash (dirhash) @ ufs/ufs/ufs_dirhash.c:283
KDB: stack backtrace:
db_trace_self_wrapper(c0cabf9f,e699987c,c08d8065,c08c8abb,c0caef50,...) at db_trace_self_wrapper+0x26
kdb_backtrace(c08c8abb,c0caef50,c413af60,c413e228,e69998d8,...) at kdb_backtrace+0x29
_witness_debugger(c0caef50,c46a5c00,c0cd2952,c413e228,c0cd25e4,...) at _witness_debugger+0x25
witness_checkorder(c46a5c00,9,c0cd25db,11b,0,...) at witness_checkorder+0x839
_sx_xlock(c46a5c00,0,c0cd25db,11b,c479c910,...) at _sx_xlock+0x85
ufsdirhash_acquire(d8102760,e6999a1c,164,d87694ac,e69999a8,...) at ufsdirhash_acquire+0x48
ufsdirhash_add(c479c910,e6999a1c,4ac,e6999994,e6999998,...) at ufsdirhash_add+0x13
ufs_direnter(c482515c,c47a1d98,e6999a1c,e6999c00,d81051a0,...) at ufs_direnter+0x749
ufs_mkdir(e6999c28,c0ce8045,0,0,e6999b6c,...) at ufs_mkdir+0x993
VOP_MKDIR_APV(c0db95c0,e6999c28,e6999c00,e6999b6c,0,...) at VOP_MKDIR_APV+0xc5
kern_mkdirat(c4822240,ffffff9c,bfbfef5a,0,1ff,...) at kern_mkdirat+0x21b
kern_mkdir(c4822240,bfbfef5a,0,1ff,e6999d2c,...) at kern_mkdir+0x2e
mkdir(c4822240,e6999cf8,8,c0caf801,c0d973e0,...) at mkdir+0x29
syscall(e6999d38) at syscall+0x2b4
Xint0x80_syscall() at Xint0x80_syscall+0x20
--- syscall (136, FreeBSD ELF32, mkdir), eip = 0x2816b313, esp = 0xbfbfed6c, ebp = 0xbfbfee38 ---
fxp0: link state changed to UP
Starting Network: lo0 fxp0.
add net default: gateway 192.168.1.1
Additional ABI support: linux.
Starting mountd.
Configuring syscons: keymap blanktime.
Local package initialization:lock order reversal:
 1st 0xc47a01b4 ufs (ufs) @ kern/vfs_subr.c:2091
 2nd 0xd810f960 bufwait (bufwait) @ ufs/ffs/ffs_softdep.c:10915
 3rd 0xc48bac94 ufs (ufs) @ kern/vfs_subr.c:2091
KDB: stack backtrace:
db_trace_self_wrapper(c0cabf9f,e6a5f86c,c08d8065,c08c8abb,c0caef69,...) at db_trace_self_wrapper+0x26
kdb_backtrace(c08c8abb,c0caef69,c413af60,c413e1c0,e6a5f8c8,...) at kdb_backtrace+0x29
_witness_debugger(c0caef69,c48bac94,c0ca157a,c413e1c0,c0cb60ef,...) at _witness_debugger+0x25
witness_checkorder(c48bac94,9,c0cb60e6,82b,0,...) at witness_checkorder+0x839
__lockmgr_args(c48bac94,80100,c48bad00,0,0,...) at __lockmgr_args+0x804
ffs_lock(e6a5f9e8,c08d7e0b,c0cb55cd,80100,c48bac3c,...) at ffs_lock+0xa1
VOP_LOCK1_APV(c0db95c0,e6a5f9e8,109,c0dd3fa0,c48bac3c,...) at VOP_LOCK1_APV+0xb5
_vn_lock(c48bac3c,80100,c0cb60e6,82b,4,...) at _vn_lock+0x78
vget(c48bac3c,80100,c4a67480,50,0,...) at vget+0xbb
vfs_hash_get(c476387c,61c00,80000,c4a67480,e6a5fb38,...) at vfs_hash_get+0xed
ffs_vgetf(c476387c,61c00,80000,e6a5fb38,1,...) at ffs_vgetf+0x49
softdep_sync_metadata(c47a015c,0,c0cd215a,147,0,...) at softdep_sync_metadata+0x663
ffs_syncvnode(c47a015c,1,c4a67480,547,c0cb6b7d,...) at ffs_syncvnode+0x3e2
ffs_sync(c476387c,1,c0cb58e4,4f9,80,...) at ffs_sync+0x26f
dounmount(c476387c,8000000,c4a67480,47e,2d9c374f,...) at dounmount+0x44e
unmount(c4a67480,e6a5fcf8,8,c4a67480,c0d96768,...) at unmount+0x2ff
syscall(e6a5fd38) at syscall+0x2b4
Xint0x80_syscall() at Xint0x80_syscall+0x20
--- syscall (22, FreeBSD ELF32, unmount), eip = 0x280dae8f, esp = 0xbfbfe68c, ebp = 0xbfbfe758 ---
fsync: giving up on dirty
0xc46c5ae0: tag devfs, type VCHR
    usecount 1, writecount 0, refcount 16 mountedhere 0xc46bad00
    flags ()
    v_object 0xc471fbb0 ref 0 pages 50
    lock type devfs: EXCL by thread 0xc4a67480 (pid 985)
#0 0xc087abbe at __lockmgr_args+0xbfe
#1 0xc0916e15 at vop_stdlock+0x65
#2 0xc0bee2b5 at VOP_LOCK1_APV+0xb5
#3 0xc09347f8 at _vn_lock+0x78
#4 0xc0ad88c5 at ffs_sync+0x3d5
#5 0xc091f3ce at dounmount+0x44e
#6 0xc091f97f at unmount+0x2ff
#7 0xc0bd5464 at syscall+0x2b4
#8 0xc0bb7790 at Xint0x80_syscall+0x20
	dev ad0s1e
umount: unmount of /tmp failed: Resource temporarily unavailable
** /dev/ad0s1e (NO WRITE)
** Last Mounted on /tmp
** Phase 1 - Check Blocks and Sizes
** Phase 2 - Check Pathnames
** Phase 3 - Check Connectivity
** Phase 4 - Check Reference Counts
** Phase 5 - Check Cyl groups
7 files, 16398 used, 2012633 free (49 frags, 251573 blocks, 0.0% fragmentation)
mount: /dev/ad0s1e : Operation not permitted
usage: kill [-s signal_name] pid ...
       kill -l [exit_status]
       kill -signal_name pid ...
       kill -signal_number pid ...
 fsck -y /tmp watchdogd.

Tue Jan 19 15:54:37 CET 2010
Jan 19 15:54:55 crashbox su: pho to root on /dev/pts/0
Stopping inetd.
Stopping moused.
Waiting for PIDS: 1016.
Shutting down local packages:.
Stopping cron.
Stopping sshd.
Stopping ntpd.
Stopping nfsd.
Stopping rpcbind.
Stopping devd.
Writing entropy file:.
Terminated
.
Jan 19 15:55:00 crashbox syslogd: exiting on signal 15
Enter full pathname of shell or RETURN for /bin/sh: 
# umount /tmp
fsync: giving up on dirty
0xc46c5ae0: tag devfs, type VCHR
    usecount 1, writecount 0, refcount 16 mountedhere 0xc46bad00
    flags ()
    v_object 0xc471fbb0 ref 0 pages 50
    lock type devfs: EXCL by thread 0xc475fb40 (pid 1333)
#0 0xc087abbe at __lockmgr_args+0xbfe
#1 0xc0916e15 at vop_stdlock+0x65
#2 0xc0bee2b5 at VOP_LOCK1_APV+0xb5
#3 0xc09347f8 at _vn_lock+0x78
#4 0xc0ad88c5 at ffs_sync+0x3d5
#5 0xc091f3ce at dounmount+0x44e
#6 0xc091f97f at unmount+0x2ff
#7 0xc0bd5464 at syscall+0x2b4
#8 0xc0bb7790 at Xint0x80_syscall+0x20
	dev ad0s1e
umount: unmount of /tmp failed: Resource temporarily unavailable
# fstat /tmp
USER     CMD          PID   FD MOUNT      INUM MODE         SZ|DV R/W NAME
# mount
/dev/ad0s1a on / (ufs, local)
devfs on /dev (devfs, local, multilabel)
/dev/ad0s1f on /home (ufs, local)
/dev/ad0s1e on /tmp (ufs, NFS exported, local, union, soft-updates)
/dev/ad0s1d on /usr (ufs, local)
/dev/ad0s1g on /var (ufs, local)
# dumpfs /dev/ad0s1e | less
magic   19540119 (UFS2) time    Tue Jan 19 15:54:35 2010
superblock location     65536   id      [ 4b55c6d0 2d9c374f ]
ncg     23      size    2097152 blocks  2029031
bsize   16384   shift   14      mask    0xffffc000
fsize   2048    shift   11      mask    0xfffff800
frag    8       shift   3       fsbtodb 2
minfree 8%      optim   time    symlinklen 120
maxbsize 16384  maxbpg  2048    maxcontig 8     contigsumsize 8
nbfree  251573  ndir    6       nifree  541687  nffree  49
bpg     11761   fpg     94088   ipg     23552   unrefs  0
nindir  2048    inopb   64      maxfilesize     140806241583103
sbsize  2048    cgsize  16384   csaddr  3000    cssize  2048
sblkno  40      cblkno  48      iblkno  56      dblkno  3000
cgrotor 0       fmod    0       ronly   0       clean   0
avgfpdir 64     avgfilesize 16384
flags   soft-updates unknown flags (0x200)
fsmnt   /tmp
volname         swuid   0

# ~KDB: enter: Line break on console
[thread pid 11 tid 100006 ]
Stopped at      kdb_enter+0x3a: movl    $0,kdb_why
db> show mount
0xc46c0b50 /dev/ad0s1a on / (ufs)
0xc46c1000 devfs on /dev (devfs)
0xc4763b50 /dev/ad0s1f on /home (ufs)
0xc476387c /dev/ad0s1e on /tmp (ufs)
0xc47635a8 /dev/ad0s1d on /usr (ufs)
0xc47632d4 /dev/ad0s1g on /var (ufs)

More info: show mount <addr>
db> show mount 0xc476387c
0xc476387c /dev/ad0s1e on /tmp (ufs)
    mnt_flag = UNION, SOFTDEP, EXPORTED, DEFEXPORTED, LOCAL
    mnt_kern_flag = SOFTDEP, MPSAFE, LOOKUP_SHARED, 0x00000040
    mnt_opt = fstype, fspath, from, errmsg, rw, noro
    mnt_stat = { version=537068824 type=5 flags=0x0000000000201320 bsize=2048 iosize=16384 blocks=2029031 bfree=2012633 bavail=1850311 files=541694 ffree=541687 syncwrites=0 asyncwrites=0 syncreads=0 asyncreads=0 namemax=255 owner=0 fsid=[1263912656, 765212495] }
    mnt_cred = { uid=0 ruid=0 }
    mnt_ref = 8
    mnt_gen = 1
    mnt_nvnodelistsize = 8
    mnt_writeopcount = 0
    mnt_noasync = 1
    mnt_maxsymlinklen = 120
    mnt_iosize_max = 131072
    mnt_hashseed = 4211062285
    mnt_secondary_writes = 0
    mnt_secondary_accwrites = 21
    mnt_gjprovider = NULL

vnode 0xc47a02b8: tag ufs, type VREG
    usecount 0, writecount 0, refcount 1 mountedhere 0
    flags ()
    lock type ufs: UNLOCKED
#0 0xc087abbe at __lockmgr_args+0xbfe
#1 0xc0ad7a01 at ffs_vgetf+0x1e1
#2 0xc0ad7dce at ffs_vget+0x2e
#3 0xc0ac739b at softdep_mount+0xeb
#4 0xc0adb5f2 at ffs_mount+0x2452
#5 0xc09209c8 at vfs_donmount+0x1018
#6 0xc0922115 at nmount+0x75
#7 0xc0bd5464 at syscall+0x2b4
#8 0xc0bb7790 at Xint0x80_syscall+0x20
	ino 4, on dev ad0s1e
vnode 0xc47a015c: tag ufs, type VDIR
    usecount 0, writecount 0, refcount 2 mountedhere 0
    flags (VV_ROOT)
    v_object 0xc4823b28 ref 0 pages 1
    lock type ufs: UNLOCKED
#0 0xc087a552 at __lockmgr_args+0x592
#1 0xc0add191 at ffs_lock+0xa1
#2 0xc0bee2b5 at VOP_LOCK1_APV+0xb5
#3 0xc09347f8 at _vn_lock+0x78
#4 0xc09278fb at vget+0xbb
#5 0xc091a7bd at vfs_hash_get+0xed
#6 0xc0ad7869 at ffs_vgetf+0x49
#7 0xc0ad7dce at ffs_vget+0x2e
#8 0xc0ae9888 at ufs_root+0x28
#9 0xc091bd51 at lookup+0x9a1
#10 0xc091c86f at namei+0x57f
#11 0xc092bf72 at kern_statat_vnhook+0x72
#12 0xc092c0cc at kern_statat+0x3c
#13 0xc092c216 at kern_stat+0x36
#14 0xc092c2bf at stat+0x2f
#15 0xc0bd5464 at syscall+0x2b4
#16 0xc0bb7790 at Xint0x80_syscall+0x20
	ino 2, on dev ad0s1e
vnode 0xc48bb15c: tag ufs, type VDIR
    usecount 0, writecount 0, refcount 1 mountedhere 0
    flags ()
    lock type ufs: UNLOCKED
#0 0xc087a552 at __lockmgr_args+0x592
#1 0xc0add191 at ffs_lock+0xa1
#2 0xc0bee2b5 at VOP_LOCK1_APV+0xb5
#3 0xc09347f8 at _vn_lock+0x78
#4 0xc09278fb at vget+0xbb
#5 0xc091a7bd at vfs_hash_get+0xed
#6 0xc0ad7869 at ffs_vgetf+0x49
#7 0xc0ad7dce at ffs_vget+0x2e
#8 0xc0ae5411 at ufs_lookup_ino+0xaf1
#9 0xc0ae549a at ufs_lookup+0x2a
#10 0xc0beca85 at VOP_CACHEDLOOKUP_APV+0xc5
#11 0xc0914686 at vfs_cache_lookup+0xd6
#12 0xc0bef435 at VOP_LOOKUP_APV+0xe5
#13 0xc091ba2b at lookup+0x67b
#14 0xc091c86f at namei+0x57f
#15 0xc092bf72 at kern_statat_vnhook+0x72
#16 0xc092c0cc at kern_statat+0x3c
#17 0xc092c106 at kern_lstat+0x36
	ino 70656, on dev ad0s1e
vnode 0xc48bb000: tag ufs, type VDIR
    usecount 0, writecount 0, refcount 1 mountedhere 0
    flags ()
    lock type ufs: UNLOCKED
#0 0xc087a552 at __lockmgr_args+0x592
#1 0xc0add191 at ffs_lock+0xa1
#2 0xc0bee2b5 at VOP_LOCK1_APV+0xb5
#3 0xc09347f8 at _vn_lock+0x78
#4 0xc09278fb at vget+0xbb
#5 0xc091a7bd at vfs_hash_get+0xed
#6 0xc0ad7869 at ffs_vgetf+0x49
#7 0xc0ad7dce at ffs_vget+0x2e
#8 0xc0ae5411 at ufs_lookup_ino+0xaf1
#9 0xc0ae549a at ufs_lookup+0x2a
#10 0xc0beca85 at VOP_CACHEDLOOKUP_APV+0xc5
#11 0xc0914686 at vfs_cache_lookup+0xd6
#12 0xc0bef435 at VOP_LOOKUP_APV+0xe5
#13 0xc091ba2b at lookup+0x67b
#14 0xc091c86f at namei+0x57f
#15 0xc092bf72 at kern_statat_vnhook+0x72
#16 0xc092c0cc at kern_statat+0x3c
#17 0xc092c106 at kern_lstat+0x36
	ino 494592, on dev ad0s1e
vnode 0xc48bad98: tag ufs, type VDIR
    usecount 0, writecount 0, refcount 1 mountedhere 0
    flags ()
    lock type ufs: UNLOCKED
#0 0xc087a552 at __lockmgr_args+0x592
#1 0xc0add191 at ffs_lock+0xa1
#2 0xc0bee2b5 at VOP_LOCK1_APV+0xb5
#3 0xc09347f8 at _vn_lock+0x78
#4 0xc09278fb at vget+0xbb
#5 0xc091447b at cache_lookup+0x67b
#6 0xc091465d at vfs_cache_lookup+0xad
#7 0xc0bef435 at VOP_LOOKUP_APV+0xe5
#8 0xc091ba2b at lookup+0x67b
#9 0xc091c86f at namei+0x57f
#10 0xc092bdb6 at kern_pathconf+0x56
#11 0xc092beb1 at lpathconf+0x31
#12 0xc0bd5464 at syscall+0x2b4
#13 0xc0bb7790 at Xint0x80_syscall+0x20
	ino 23552, on dev ad0s1e
vnode 0xc48bac3c: tag ufs, type VDIR
    usecount 0, writecount 0, refcount 1 mountedhere 0
    flags ()
    lock type ufs: UNLOCKED
#0 0xc087a552 at __lockmgr_args+0x592
#1 0xc0add191 at ffs_lock+0xa1
#2 0xc0bee2b5 at VOP_LOCK1_APV+0xb5
#3 0xc09347f8 at _vn_lock+0x78
#4 0xc09278fb at vget+0xbb
#5 0xc091a7bd at vfs_hash_get+0xed
#6 0xc0ad7869 at ffs_vgetf+0x49
#7 0xc0ad7dce at ffs_vget+0x2e
#8 0xc0ae5411 at ufs_lookup_ino+0xaf1
#9 0xc0ae549a at ufs_lookup+0x2a
#10 0xc0beca85 at VOP_CACHEDLOOKUP_APV+0xc5
#11 0xc0914686 at vfs_cache_lookup+0xd6
#12 0xc0bef435 at VOP_LOOKUP_APV+0xe5
#13 0xc091ba2b at lookup+0x67b
#14 0xc091c86f at namei+0x57f
#15 0xc092bf72 at kern_statat_vnhook+0x72
#16 0xc092c0cc at kern_statat+0x3c
#17 0xc092c106 at kern_lstat+0x36
	ino 400384, on dev ad0s1e
vnode 0xc4a7cd98: tag ufs, type VDIR
    usecount 0, writecount 0, refcount 0 mountedhere 0
    flags (VI_FREE)
    lock type ufs: UNLOCKED
#0 0xc087abbe at __lockmgr_args+0xbfe
#1 0xc0ad7a01 at ffs_vgetf+0x1e1
#2 0xc0ad7dce at ffs_vget+0x2e
#3 0xc0ae5411 at ufs_lookup_ino+0xaf1
#4 0xc0ae549a at ufs_lookup+0x2a
#5 0xc0beca85 at VOP_CACHEDLOOKUP_APV+0xc5
#6 0xc0914686 at vfs_cache_lookup+0xd6
#7 0xc0bef435 at VOP_LOOKUP_APV+0xe5
#8 0xc091ba2b at lookup+0x67b
#9 0xc091c86f at namei+0x57f
#10 0xc092bf72 at kern_statat_vnhook+0x72
#11 0xc092c0cc at kern_statat+0x3c
#12 0xc092c106 at kern_lstat+0x36
#13 0xc092c1af at lstat+0x2f
#14 0xc0bd5464 at syscall+0x2b4
#15 0xc0bb7790 at Xint0x80_syscall+0x20
	ino 3, on dev ad0s1e
vnode 0xc4cc8d98: tag syncer, type VNON
    usecount 1, writecount 0, refcount 1 mountedhere 0
    flags ()
    lock type syncer: UNLOCKED
#0 0xc087abbe at __lockmgr_args+0xbfe
#1 0xc0916e15 at vop_stdlock+0x65
#2 0xc0bee2b5 at VOP_LOCK1_APV+0xb5
#3 0xc09347f8 at _vn_lock+0x78
#4 0xc09292a2 at sync_vnode+0x142
#5 0xc0929613 at sched_sync+0x273
#6 0xc0867d58 at fork_exit+0xb8
#7 0xc0bb77a0 at fork_trampoline+0x8
db> run pho
db:0:pho> bt
Tracing pid 11 tid 100006 td 0xc417f480
kdb_enter(c0c51169,c0c919a2,0,c438a380,0,...) at kdb_enter+0x3a
uart_intr(c438a300,c417f480,c415d8d0,c4183100,4,...) at uart_intr+0x126
intr_event_handle(c4183100,c3f0ac34,0,1f4,c4425400,...) at intr_event_handle+0x5c
intr_execute_handlers(c415d8d0,c3f0ac34,0,c3f0ac74,c0bb7af4,...) at intr_execute_handlers+0x49
lapic_handle_intr(38,c3f0ac34) at lapic_handle_intr+0x4c
Xapic_isr1() at Xapic_isr1+0x34
--- interrupt, eip = 0xc0babf15, esp = 0xc3f0ac74, ebp = 0xc3f0ac74 ---
acpi_cpu_c1(1,0,0,c08b76a1,c0e1b3e0,...) at acpi_cpu_c1+0x5
acpi_cpu_idle(c3f0acb4,c0bc36cb,1,c3f0acf8,c08b799e,...) at acpi_cpu_idle+0x11c
cpu_idle_acpi(1,c3f0acf8,c08b799e,1,c3f0acd4,...) at cpu_idle_acpi+0x1b
cpu_idle(1,c3f0acd4,c0caa6ae,3b0,c417f480,...) at cpu_idle+0x1b
sched_idletd(0,c3f0ad38,c0ca3ea7,343,c417daa0,...) at sched_idletd+0x23e
fork_exit(c08b7760,0,c3f0ad38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xc3f0ad70, ebp = 0 ---
db:0:bt>  show allpcpu
Current CPU: 0

cpuid        = 0
dynamic pcpu    = 0x650980
curthread    = 0xc417f480: pid 11 "idle: cpu0"
curpcb       = 0xc3f0ad90
fpcurthread  = none
idlethread   = 0xc417f480: pid 11 "idle: cpu0"
APIC ID      = 0
currentldt   = 0x50
spin locks held:

cpuid        = 1
dynamic pcpu    = 0x310c980
curthread    = 0xc417f6c0: pid 11 "idle: cpu1"
curpcb       = 0xc3f07d90
fpcurthread  = none
idlethread   = 0xc417f6c0: pid 11 "idle: cpu1"
APIC ID      = 1
currentldt   = 0x50
spin locks held:

cpuid        = 2
dynamic pcpu    = 0x310f980
curthread    = 0xc417f900: pid 11 "idle: cpu2"
curpcb       = 0xc3f04d90
fpcurthread  = none
idlethread   = 0xc417f900: pid 11 "idle: cpu2"
APIC ID      = 6
currentldt   = 0x50
spin locks held:

cpuid        = 3
dynamic pcpu    = 0x3112980
curthread    = 0xc417fb40: pid 11 "idle: cpu3"
curpcb       = 0xc3f01d90
fpcurthread  = none
idlethread   = 0xc417fb40: pid 11 "idle: cpu3"
APIC ID      = 7
currentldt   = 0x50
spin locks held:

db:0:allpcpu>  show alllocks
db:0:alllocks>  show lockedvnods
Locked vnodes
db:0:lockedvnods>  show mount
0xc46c0b50 /dev/ad0s1a on / (ufs)
0xc46c1000 devfs on /dev (devfs)
0xc4763b50 /dev/ad0s1f on /home (ufs)
0xc476387c /dev/ad0s1e on /tmp (ufs)
0xc47635a8 /dev/ad0s1d on /usr (ufs)
0xc47632d4 /dev/ad0s1g on /var (ufs)

More info: show mount <addr>
db:0:mount> ps
  pid  ppid  pgrp   uid   state   wmesg     wchan    cmd
 1332     1  1332     0  Ss+     ttyin    0xc4290a70 sh
   19     0     0     0  DL      flowclea 0xc0f75888 [flowcleaner]
   18     0     0     0  DL      sdflush  0xc0f810e0 [softdepflush]
   17     0     0     0  DL      syncer   0xc0f75698 [syncer]
   16     0     0     0  DL      vlruwt   0xc465dd48 [vnlru]
   15     0     0     0  DL      psleep   0xc0f753c8 [bufdaemon]
    9     0     0     0  DL      pgzero   0xc0f81f14 [pagezero]
    8     0     0     0  DL      psleep   0xc0f81b44 [vmdaemon]
    7     0     0     0  DL      psleep   0xc0f81b0c [pagedaemon]
    6     0     0     0  DL      -        0xc429063c [fdc0]
   14     0     0     0  DL      (threaded)          [usb]
100034                   D       -        0xc43a3dac [usbus0]
100033                   D       -        0xc43a3d7c [usbus0]
100032                   D       -        0xc43a3d4c [usbus0]
100031                   D       -        0xc43a3d1c [usbus0]
    5     0     0     0  DL      ccb_scan 0xc0dd5354 [xpt_thrd]
   13     0     0     0  DL      -        0xc0e08fc4 [yarrow]
    4     0     0     0  DL      -        0xc0e06d64 [g_down]
    3     0     0     0  DL      -        0xc0e06d60 [g_up]
    2     0     0     0  DL      -        0xc0e06d58 [g_event]
   12     0     0     0  WL      (threaded)          [intr]
100042                   I                           [irq7: ppc0]
100040                   I                           [swi0: uart uart]
100039                   I                           [irq12: psm0]
100038                   I                           [irq1: atkbd0]
100037                   I                           [irq15: ata1]
100036                   I                           [irq14: ata0]
100035                   I                           [irq17: fxp0]
100030                   I                           [irq16: uhci0]
100028                   I                           [irq9: acpi0]
100024                   I                           [swi2: cambio]
100022                   I                           [swi6: task queue]
100021                   I                           [swi6: Giant taskq]
100019                   I                           [swi5: +]
100012                   I                           [swi1: netisr 0]
100011                   I                           [swi3: vm]
100010                   I                           [swi4: clock]
100009                   I                           [swi4: clock]
100008                   I                           [swi4: clock]
100007                   I                           [swi4: clock]
   11     0     0     0  RL      (threaded)          [idle]
100006                   Run     CPU 0               [idle: cpu0]
100005                   Run     CPU 1               [idle: cpu1]
100004                   Run     CPU 2               [idle: cpu2]
100003                   Run     CPU 3               [idle: cpu3]
    1     0     1     0  SLs     wait     0xc417dd48 [init]
   10     0     0     0  DL      audit_wo 0xc0f80900 [audit]
    0     0     0     0  DLs     (threaded)          [kernel]
100029                   D       -        0xc4387340 [em0 taskq]
100027                   D       -        0xc4344100 [acpi_task_2]
100026                   D       -        0xc4344100 [acpi_task_1]
100025                   D       -        0xc4344100 [acpi_task_0]
100020                   D       -        0xc4344380 [thread taskq]
100018                   D       -        0xc4344600 [kqueue taskq]
100016                   D       -        0xc4164e00 [firmware taskq]
100000                   D       sched    0xc0e06e40 [swapper]
db:0:ps>  allt

Tracing command sh pid 1332 tid 100127 td 0xc49846c0
sched_switch(c49846c0,0,104,191,be823364,...) at sched_switch+0x406
mi_switch(104,0,c0cad0c9,1eb,0,...) at mi_switch+0x200
sleepq_switch(c49846c0,0,c0cad0c9,1a0,0,...) at sleepq_switch+0x15f
sleepq_catch_signals(c088233a,c4290a04,0,c0ca723a,c49846c0,...) at sleepq_catch_signals+0xb7
sleepq_wait_sig(c4290a70,0,e6a4bb0c,101,0,...) at sleepq_wait_sig+0x17
_cv_wait_sig(c4290a70,c4290a04,c0cb1070,511,0,...) at _cv_wait_sig+0x240
tty_wait(c4290a00,c4290a70,3ff,e6a4bb87,c0da5520,...) at tty_wait+0x71
ttydisc_read(c4290a00,e6a4bc58,0,9f,0,...) at ttydisc_read+0xef
ttydev_read(c416c800,e6a4bc58,0,0,3ff,...) at ttydev_read+0xaa
devfs_read_f(c4768968,e6a4bc58,c4183380,0,c49846c0,...) at devfs_read_f+0x7e
dofileread(e6a4bc58,ffffffff,ffffffff,0,c4768968,...) at dofileread+0x96
kern_readv(c49846c0,0,e6a4bc58,e6a4bc78,1,...) at kern_readv+0x58
read(c49846c0,e6a4bcf8,c,c0c9079c,c0d96554,...) at read+0x4f
syscall(e6a4bd38) at syscall+0x2b4
Xint0x80_syscall() at Xint0x80_syscall+0x20
--- syscall (3, FreeBSD ELF32, read), eip = 0x281ebee3, esp = 0xbfbfedbc, ebp = 0xbfbfedf8 ---

Tracing command flowcleaner pid 19 tid 100050 td 0xc436b6c0
sched_switch(c436b6c0,0,104,191,45996f6a,...) at sched_switch+0x406
mi_switch(104,0,c0cad0c9,1eb,0,...) at mi_switch+0x200
sleepq_switch(c436b6c0,0,c0cad0c9,283,c436b6c0,...) at sleepq_switch+0x15f
sleepq_timedwait(c0f75888,0,e4703cc4,1,0,...) at sleepq_timedwait+0x6b
_cv_timedwait(c0f75888,c0f75890,2710,3f0,0,...) at _cv_timedwait+0x250
flowtable_cleaner(0,e4703d38,c0ca3ea7,343,c465d550,...) at flowtable_cleaner+0x1bf
fork_exit(c093e040,0,e4703d38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xe4703d70, ebp = 0 ---

Tracing command softdepflush pid 18 tid 100049 td 0xc436b900
sched_switch(c436b900,0,104,191,97981186,...) at sched_switch+0x406
mi_switch(104,0,c0cad0c9,1eb,44,...) at mi_switch+0x200
sleepq_switch(c436b900,0,c0cad0c9,283,0,...) at sleepq_switch+0x15f
sleepq_timedwait(c0f810e0,44,c0cd1708,0,0,...) at sleepq_timedwait+0x6b
_sleep(c0f810e0,c0f81094,44,c0cd1708,3e8,...) at _sleep+0x339
softdep_flush(0,e4700d38,c0ca3ea7,343,c465d7f8,...) at softdep_flush+0x250
fork_exit(c0ad3ce0,0,e4700d38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xe4700d70, ebp = 0 ---

Tracing command syncer pid 17 tid 100048 td 0xc436bb40
sched_switch(c436bb40,0,104,191,98738a2a,...) at sched_switch+0x406
mi_switch(104,0,c0cad0c9,1eb,0,...) at mi_switch+0x200
sleepq_switch(c436bb40,0,c0cad0c9,283,c436bb40,...) at sleepq_switch+0x15f
sleepq_timedwait(c0f75698,0,e46fdc88,1,0,...) at sleepq_timedwait+0x6b
_cv_timedwait(c0f75698,c0f75684,3e8,6d4,4e20,...) at _cv_timedwait+0x250
sched_sync(0,e46fdd38,c0ca3ea7,343,c465daa0,...) at sched_sync+0x502
fork_exit(c09293a0,0,e46fdd38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xe46fdd70, ebp = 0 ---

Tracing command vnlru pid 16 tid 100047 td 0xc436bd80
sched_switch(c436bd80,0,104,191,845a6f46,...) at sched_switch+0x406
mi_switch(104,0,c0cad0c9,1eb,50,...) at mi_switch+0x200
sleepq_switch(c436bd80,0,c0cad0c9,283,0,...) at sleepq_switch+0x15f
sleepq_timedwait(c465dd48,50,c0cb70c7,0,0,...) at sleepq_timedwait+0x6b
_sleep(c465dd48,c0f75658,250,c0cb70c7,3e8,...) at _sleep+0x339
vnlru_proc(0,e46fad38,c0ca3ea7,343,c465dd48,...) at vnlru_proc+0xe7
fork_exit(c0929f70,0,e46fad38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xe46fad70, ebp = 0 ---

Tracing command bufdaemon pid 15 tid 100046 td 0xc441f000
sched_switch(c441f000,0,104,191,9bfddf12,...) at sched_switch+0x406
mi_switch(104,0,c0cad0c9,1eb,44,...) at mi_switch+0x200
sleepq_switch(c441f000,0,c0cad0c9,283,0,...) at sleepq_switch+0x15f
sleepq_timedwait(c0f753c8,44,c0cb4624,0,0,...) at sleepq_timedwait+0x6b
_sleep(c0f753c8,c0f753d0,44,c0cb4624,3e8,...) at _sleep+0x339
buf_daemon(0,e46f7d38,c0ca3ea7,343,c417e2a8,...) at buf_daemon+0x16e
fork_exit(c0911000,0,e46f7d38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xe46f7d70, ebp = 0 ---

Tracing command pagezero pid 9 tid 100045 td 0xc441f240
sched_switch(c441f240,0,104,191,ed7503f0,...) at sched_switch+0x406
mi_switch(104,0,c0cad0c9,1eb,0,...) at mi_switch+0x200
sleepq_switch(c441f240,0,c0cad0c9,283,0,...) at sleepq_switch+0x15f
sleepq_timedwait(c0f81f14,0,c0cd7570,0,0,...) at sleepq_timedwait+0x6b
_sleep(c0f81f14,c0f81a00,0,c0cd7570,493e0,...) at _sleep+0x339
vm_pagezero(0,e46f4d38,c0ca3ea7,343,c417e550,...) at vm_pagezero+0xdc
fork_exit(c0b13700,0,e46f4d38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xe46f4d70, ebp = 0 ---

Tracing command vmdaemon pid 8 tid 100044 td 0xc441f480
sched_switch(c441f480,0,104,191,360d9454,...) at sched_switch+0x406
mi_switch(104,0,c0cad0c9,1eb,68,...) at mi_switch+0x200
sleepq_switch(c441f480,0,c0cad0c9,260,0,...) at sleepq_switch+0x15f
sleepq_wait(c0f81b44,68,c0cb4624,0,0,...) at sleepq_wait+0x63
_sleep(c0f81b44,c0f81b48,68,c0cb4624,0,...) at _sleep+0x36b
vm_daemon(0,e46f1d38,c0ca3ea7,343,c417e7f8,...) at vm_daemon+0x59
fork_exit(c0b0db90,0,e46f1d38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xe46f1d70, ebp = 0 ---

Tracing command pagedaemon pid 7 tid 100043 td 0xc441f6c0
sched_switch(c441f6c0,0,104,191,5cd6e4fa,...) at sched_switch+0x406
mi_switch(104,0,c0cad0c9,1eb,44,...) at mi_switch+0x200
sleepq_switch(c441f6c0,0,c0cad0c9,283,0,...) at sleepq_switch+0x15f
sleepq_timedwait(c0f81b0c,44,c0cb4624,0,0,...) at sleepq_timedwait+0x6b
_sleep(c0f81b0c,c0f81a00,44,c0cb4624,1388,...) at _sleep+0x339
vm_pageout(0,e46eed38,c0ca3ea7,343,c417eaa0,...) at vm_pageout+0x2bb
fork_exit(c0b0ea30,0,e46eed38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xe46eed70, ebp = 0 ---

Tracing command fdc0 pid 6 tid 100041 td 0xc441fb40
sched_switch(c441fb40,0,104,191,84ff3bf0,...) at sched_switch+0x406
mi_switch(104,0,c0cad0c9,1eb,4c,...) at mi_switch+0x200
sleepq_switch(c441fb40,0,c0cad0c9,283,0,...) at sleepq_switch+0x15f
sleepq_timedwait(c429063c,4c,c0c9e2e9,0,0,...) at sleepq_timedwait+0x6b
_sleep(c429063c,c42906f0,4c,c0c9e2e9,3e8,...) at _sleep+0x339
fdc_thread(c4290600,e46e8d38,c0ca3ea7,343,c417ed48,...) at fdc_thread+0x27d
fork_exit(c0b90d40,c4290600,e46e8d38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xe46e8d70, ebp = 0 ---

Tracing command usb pid 14 tid 100034 td 0xc43696c0
sched_switch(c43696c0,0,104,191,b870d188,...) at sched_switch+0x406
mi_switch(104,0,c0cad0c9,1eb,0,...) at mi_switch+0x200
sleepq_switch(c43696c0,0,c0cad0c9,260,c43696c0,...) at sleepq_switch+0x15f
sleepq_wait(c43a3dac,0,c3f88cbc,1,0,...) at sleepq_wait+0x63
_cv_wait(c43a3dac,c43a3e4c,c0c96fd9,6c,c43a3db4,...) at _cv_wait+0x240
usb_process(c43a3da4,c3f88d38,c0ca3ea7,343,c4346000,...) at usb_process+0x193
fork_exit(c07c2850,c43a3da4,c3f88d38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xc3f88d70, ebp = 0 ---

Tracing command usb pid 14 tid 100033 td 0xc4369900
sched_switch(c4369900,0,104,191,99f2d9b2,...) at sched_switch+0x406
mi_switch(104,0,c0cad0c9,1eb,0,...) at mi_switch+0x200
sleepq_switch(c4369900,0,c0cad0c9,260,c4369900,...) at sleepq_switch+0x15f
sleepq_wait(c43a3d7c,0,c3f85cbc,1,0,...) at sleepq_wait+0x63
_cv_wait(c43a3d7c,c43a3e4c,c0c96fd9,6c,c43a3d84,...) at _cv_wait+0x240
usb_process(c43a3d74,c3f85d38,c0ca3ea7,343,c4346000,...) at usb_process+0x193
fork_exit(c07c2850,c43a3d74,c3f85d38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xc3f85d70, ebp = 0 ---

Tracing command usb pid 14 tid 100032 td 0xc4369b40
sched_switch(c4369b40,0,104,191,b7fb48c4,...) at sched_switch+0x406
mi_switch(104,0,c0cad0c9,1eb,0,...) at mi_switch+0x200
sleepq_switch(c4369b40,0,c0cad0c9,260,c4369b40,...) at sleepq_switch+0x15f
sleepq_wait(c43a3d4c,0,c3f82cbc,1,0,...) at sleepq_wait+0x63
_cv_wait(c43a3d4c,c43a3e4c,c0c96fd9,6c,c43a3d54,...) at _cv_wait+0x240
usb_process(c43a3d44,c3f82d38,c0ca3ea7,343,c4346000,...) at usb_process+0x193
fork_exit(c07c2850,c43a3d44,c3f82d38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xc3f82d70, ebp = 0 ---

Tracing command usb pid 14 tid 100031 td 0xc4369d80
sched_switch(c4369d80,0,104,191,b7fb1a0c,...) at sched_switch+0x406
mi_switch(104,0,c0cad0c9,1eb,0,...) at mi_switch+0x200
sleepq_switch(c4369d80,0,c0cad0c9,260,c4369d80,...) at sleepq_switch+0x15f
sleepq_wait(c43a3d1c,0,c3f7fcbc,1,0,...) at sleepq_wait+0x63
_cv_wait(c43a3d1c,c43a3e4c,c0c96fd9,6c,c43a3d24,...) at _cv_wait+0x240
usb_process(c43a3d14,c3f7fd38,c0ca3ea7,343,c4346000,...) at usb_process+0x193
fork_exit(c07c2850,c43a3d14,c3f7fd38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xc3f7fd70, ebp = 0 ---

Tracing command xpt_thrd pid 5 tid 100023 td 0xc4359000
sched_switch(c4359000,0,104,191,b7fad8ec,...) at sched_switch+0x406
mi_switch(104,0,c0cad0c9,1eb,4c,...) at mi_switch+0x200
sleepq_switch(c4359000,0,c0cad0c9,260,0,...) at sleepq_switch+0x15f
sleepq_wait(c0dd5354,4c,c0c4295a,0,0,...) at sleepq_wait+0x63
_sleep(c0dd5354,c0dd536c,4c,c0c4295a,0,...) at _sleep+0x36b
xpt_scanner_thread(0,c3f40d38,c0ca3ea7,343,c43462a8,...) at xpt_scanner_thread+0x4a
fork_exit(c0484d30,0,c3f40d38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xc3f40d70, ebp = 0 ---

Tracing command yarrow pid 13 tid 100017 td 0xc4181240
sched_switch(c4181240,0,104,191,d0693dbc,...) at sched_switch+0x406
mi_switch(104,0,c0cad0c9,1eb,0,...) at mi_switch+0x200
sleepq_switch(c4181240,0,c0cad0c9,283,2,...) at sleepq_switch+0x15f
sleepq_timedwait(c0e08fc4,0,c0c9e2e9,2,0,...) at sleepq_timedwait+0x6b
_sleep(c0e08fc4,0,0,c0c9e2e9,64,...) at _sleep+0x339
pause(c0c9e2e9,64,c0c8acd4,111,0,...) at pause+0x47
random_kthread(0,c3f2ed38,c0ca3ea7,343,c4346550,...) at random_kthread+0x1ef
fork_exit(c0739360,0,c3f2ed38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xc3f2ed70, ebp = 0 ---

Tracing command g_down pid 4 tid 100015 td 0xc41816c0
sched_switch(c41816c0,0,104,191,4fdd34d2,...) at sched_switch+0x406
mi_switch(104,0,c0cad0c9,1eb,4c,...) at mi_switch+0x200
sleepq_switch(c41816c0,0,c0cad0c9,260,0,...) at sleepq_switch+0x15f
sleepq_wait(c0e06d64,4c,c0c9e2e9,0,0,...) at sleepq_wait+0x63
_sleep(c0e06d64,c0e06cc8,24c,c0c9e2e9,0,...) at _sleep+0x36b
g_io_schedule_down(c41816c0,0,c0c9f9f5,74,0,...) at g_io_schedule_down+0x56
g_down_procbody(0,c3f28d38,c0ca3ea7,343,c417d000,...) at g_down_procbody+0x8d
fork_exit(c082eda0,0,c3f28d38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xc3f28d70, ebp = 0 ---

Tracing command g_up pid 3 tid 100014 td 0xc4181900
sched_switch(c4181900,0,104,191,4fe735fa,...) at sched_switch+0x406
mi_switch(104,0,c0cad0c9,1eb,4c,...) at mi_switch+0x200
sleepq_switch(c4181900,0,c0cad0c9,260,0,...) at sleepq_switch+0x15f
sleepq_wait(c0e06d60,4c,c0c9e2e9,0,0,...) at sleepq_wait+0x63
_sleep(c0e06d60,c0e06ce8,24c,c0c9e2e9,0,...) at _sleep+0x36b
g_io_schedule_up(c4181900,0,c0c9f9f5,5d,0,...) at g_io_schedule_up+0x11e
g_up_procbody(0,c3f25d38,c0ca3ea7,343,c417d2a8,...) at g_up_procbody+0x8d
fork_exit(c082ee30,0,c3f25d38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xc3f25d70, ebp = 0 ---

Tracing command g_event pid 2 tid 100013 td 0xc4181b40
sched_switch(c4181b40,0,104,191,d1607c42,...) at sched_switch+0x406
mi_switch(104,0,c0cad0c9,1eb,4c,...) at mi_switch+0x200
sleepq_switch(c4181b40,0,c0cad0c9,283,0,...) at sleepq_switch+0x15f
sleepq_timedwait(c0e06d58,4c,c0c9e2e9,0,0,...) at sleepq_timedwait+0x6b
_sleep(c0e06d58,0,4c,c0c9e2e9,64,...) at _sleep+0x339
g_event_procbody(0,c3f22d38,c0ca3ea7,343,c417d550,...) at g_event_procbody+0xcb
fork_exit(c082eec0,0,c3f22d38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xc3f22d70, ebp = 0 ---

Tracing command intr pid 12 tid 100042 td 0xc441f900
fork_trampoline() at fork_trampoline

Tracing command intr pid 12 tid 100040 td 0xc441fd80
sched_switch(c441fd80,0,109,191,be820fa0,...) at sched_switch+0x406
mi_switch(109,0,c0ca4126,52d,c44163f0,...) at mi_switch+0x200
ithread_loop(c441ab50,e46dbd38,c0ca3ea7,343,c417d7f8,...) at ithread_loop+0x1f6
fork_exit(c086ad40,c441ab50,e46dbd38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xe46dbd70, ebp = 0 ---

Tracing command intr pid 12 tid 100039 td 0xc4420000
fork_trampoline() at fork_trampoline

Tracing command intr pid 12 tid 100038 td 0xc4359d80
sched_switch(c4359d80,0,109,191,b783f47c,...) at sched_switch+0x406
mi_switch(109,0,c0ca4126,52d,c41832f0,...) at mi_switch+0x200
ithread_loop(c441a000,e46d5d38,c0ca3ea7,343,c417d7f8,...) at ithread_loop+0x1f6
fork_exit(c086ad40,c441a000,e46d5d38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xe46d5d70, ebp = 0 ---

Tracing command intr pid 12 tid 100037 td 0xc4369000
fork_trampoline() at fork_trampoline

Tracing command intr pid 12 tid 100036 td 0xc4369240
sched_switch(c4369240,0,109,191,4fe49fbe,...) at sched_switch+0x406
mi_switch(109,0,c0ca4126,52d,c4182b70,...) at mi_switch+0x200
ithread_loop(c4410830,e46ccd38,c0ca3ea7,343,c417d7f8,...) at ithread_loop+0x1f6
fork_exit(c086ad40,c4410830,e46ccd38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xe46ccd70, ebp = 0 ---

Tracing command intr pid 12 tid 100035 td 0xc4369480
sched_switch(c4369480,0,109,191,e157248,...) at sched_switch+0x406
mi_switch(109,0,c0ca4126,52d,c41829f0,...) at mi_switch+0x200
ithread_loop(c43b4b20,e46c6d38,c0ca3ea7,343,c417d7f8,...) at ithread_loop+0x1f6
fork_exit(c086ad40,c43b4b20,e46c6d38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xe46c6d70, ebp = 0 ---

Tracing command intr pid 12 tid 100030 td 0xc436b000
sched_switch(c436b000,0,109,191,b755bf34,...) at sched_switch+0x406
mi_switch(109,0,c0ca4126,52d,c4182a70,...) at mi_switch+0x200
ithread_loop(c4389840,c3f7cd38,c0ca3ea7,343,c417d7f8,...) at ithread_loop+0x1f6
fork_exit(c086ad40,c4389840,c3f7cd38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xc3f7cd70, ebp = 0 ---

Tracing command intr pid 12 tid 100028 td 0xc436b480
fork_trampoline() at fork_trampoline

Tracing command intr pid 12 tid 100024 td 0xc4206d80
fork_trampoline() at fork_trampoline

Tracing command intr pid 12 tid 100022 td 0xc4359240
sched_switch(c4359240,0,109,191,4308273e,...) at sched_switch+0x406
mi_switch(109,0,c0ca4126,52d,c41c5bf0,...) at mi_switch+0x200
ithread_loop(c4119950,c3f3dd38,c0ca3ea7,343,c417d7f8,...) at ithread_loop+0x1f6
fork_exit(c086ad40,c4119950,c3f3dd38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xc3f3dd70, ebp = 0 ---

Tracing command intr pid 12 tid 100021 td 0xc4359480
sched_switch(c4359480,0,109,191,5526675e,...) at sched_switch+0x406
mi_switch(109,0,c0ca4126,52d,c41c5c70,...) at mi_switch+0x200
ithread_loop(c4119960,c3f3ad38,c0ca3ea7,343,c417d7f8,...) at ithread_loop+0x1f6
fork_exit(c086ad40,c4119960,c3f3ad38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xc3f3ad70, ebp = 0 ---

Tracing command intr pid 12 tid 100019 td 0xc4359900
fork_trampoline() at fork_trampoline

Tracing command intr pid 12 tid 100012 td 0xc4181d80
sched_switch(c4181d80,0,109,191,5566aaf8,...) at sched_switch+0x406
mi_switch(109,0,c0ca4126,52d,c4207d70,...) at mi_switch+0x200
ithread_loop(c417c0a0,c3f1fd38,c0ca3ea7,343,c417d7f8,...) at ithread_loop+0x1f6
fork_exit(c086ad40,c417c0a0,c3f1fd38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xc3f1fd70, ebp = 0 ---

Tracing command intr pid 12 tid 100011 td 0xc4206000
fork_trampoline() at fork_trampoline

Tracing command intr pid 12 tid 100010 td 0xc4206240
sched_switch(c4206240,0,109,191,4598496c,...) at sched_switch+0x406
mi_switch(109,0,c0ca4126,52d,c4207e70,...) at mi_switch+0x200
ithread_loop(c417c0c0,c3f19d38,c0ca3ea7,343,c417d7f8,...) at ithread_loop+0x1f6
fork_exit(c086ad40,c417c0c0,c3f19d38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xc3f19d70, ebp = 0 ---

Tracing command intr pid 12 tid 100009 td 0xc4206480
sched_switch(c4206480,0,109,191,9bfd9bd8,...) at sched_switch+0x406
mi_switch(109,0,c0ca4126,52d,c41836f0,...) at mi_switch+0x200
ithread_loop(c417c0d0,c3f16d38,c0ca3ea7,343,c417d7f8,...) at ithread_loop+0x1f6
fork_exit(c086ad40,c417c0d0,c3f16d38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xc3f16d70, ebp = 0 ---

Tracing command intr pid 12 tid 100008 td 0xc417f000
sched_switch(c417f000,0,109,191,d160413a,...) at sched_switch+0x406
mi_switch(109,0,c0ca4126,52d,c4183770,...) at mi_switch+0x200
ithread_loop(c417c0e0,c3f13d38,c0ca3ea7,343,c417d7f8,...) at ithread_loop+0x1f6
fork_exit(c086ad40,c417c0e0,c3f13d38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xc3f13d70, ebp = 0 ---

Tracing command intr pid 12 tid 100007 td 0xc417f240
sched_switch(c417f240,0,109,191,d2281fca,...) at sched_switch+0x406
mi_switch(109,0,c0ca4126,52d,c41837f0,...) at mi_switch+0x200
ithread_loop(c417c0f0,c3f10d38,c0ca3ea7,343,c417d7f8,...) at ithread_loop+0x1f6
fork_exit(c086ad40,c417c0f0,c3f10d38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xc3f10d70, ebp = 0 ---

Tracing command idle pid 11 tid 100006 td 0xc417f480
kdb_enter(c0c51169,c0c919a2,0,c438a380,0,...) at kdb_enter+0x3a
uart_intr(c438a300,c417f480,c415d8d0,c4183100,4,...) at uart_intr+0x126
intr_event_handle(c4183100,c3f0ac34,0,1f4,c4425400,...) at intr_event_handle+0x5c
intr_execute_handlers(c415d8d0,c3f0ac34,0,c3f0ac74,c0bb7af4,...) at intr_execute_handlers+0x49
lapic_handle_intr(38,c3f0ac34) at lapic_handle_intr+0x4c
Xapic_isr1() at Xapic_isr1+0x34
--- interrupt, eip = 0xc0babf15, esp = 0xc3f0ac74, ebp = 0xc3f0ac74 ---
acpi_cpu_c1(1,0,0,c08b76a1,c0e1b3e0,...) at acpi_cpu_c1+0x5
acpi_cpu_idle(c3f0acb4,c0bc36cb,1,c3f0acf8,c08b799e,...) at acpi_cpu_idle+0x11c
cpu_idle_acpi(1,c3f0acf8,c08b799e,1,c3f0acd4,...) at cpu_idle_acpi+0x1b
cpu_idle(1,c3f0acd4,c0caa6ae,3b0,c417f480,...) at cpu_idle+0x1b
sched_idletd(0,c3f0ad38,c0ca3ea7,343,c417daa0,...) at sched_idletd+0x23e
fork_exit(c08b7760,0,c3f0ad38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xc3f0ad70, ebp = 0 ---

Tracing command idle pid 11 tid 100005 td 0xc417f6c0
cpustop_handler(2,c3f07c28,c0bd56e6,c0e091dc,c3f07bbc,...) at cpustop_handler+0x32
ipi_nmi_handler(c0e091dc,c3f07bbc,c0881f84,c0e091dc,c417daa0,...) at ipi_nmi_handler+0x2f
trap(c3f07c34) at trap+0x36
calltrap() at calltrap+0x6
--- trap 0x13, eip = 0xc0babf15, esp = 0xc3f07c74, ebp = 0xc3f07c74 ---
acpi_cpu_c1(1,0,1,c08b76a1,c0e1b3e0,...) at acpi_cpu_c1+0x5
acpi_cpu_idle(c3f07cb4,c0bc36cb,0,c3f07cf8,c08b799e,...) at acpi_cpu_idle+0x11c
cpu_idle_acpi(0,c3f07cf8,c08b799e,0,c3f07cd4,...) at cpu_idle_acpi+0x1b
cpu_idle(0,c3f07cd4,c0caa6ae,3b0,c417f6c0,...) at cpu_idle+0x1b
sched_idletd(0,c3f07d38,c0ca3ea7,343,c417daa0,...) at sched_idletd+0x23e
fork_exit(c08b7760,0,c3f07d38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xc3f07d70, ebp = 0 ---

Tracing command idle pid 11 tid 100004 td 0xc417f900
cpustop_handler(4,c3f04c28,c0bd56e6,c0e09218,c3f04bbc,...) at cpustop_handler+0x32
ipi_nmi_handler(c0e09218,c3f04bbc,c0881f84,c0e09218,c417daa0,...) at ipi_nmi_handler+0x2f
trap(c3f04c34) at trap+0x36
calltrap() at calltrap+0x6
--- trap 0x13, eip = 0xc0babf15, esp = 0xc3f04c74, ebp = 0xc3f04c74 ---
acpi_cpu_c1(1,0,2,c08b76a1,c0e1b3e0,...) at acpi_cpu_c1+0x5
acpi_cpu_idle(c3f04cb4,c0bc36cb,0,c3f04cf8,c08b799e,...) at acpi_cpu_idle+0x11c
cpu_idle_acpi(0,c3f04cf8,c08b799e,0,c3f04cd4,...) at cpu_idle_acpi+0x1b
cpu_idle(0,c3f04cd4,c0caa6ae,a09,c417f900,...) at cpu_idle+0x1b
sched_idletd(0,c3f04d38,c0ca3ea7,343,c417daa0,...) at sched_idletd+0x23e
fork_exit(c08b7760,0,c3f04d38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xc3f04d70, ebp = 0 ---

Tracing command idle pid 11 tid 100003 td 0xc417fb40
cpustop_handler(8,c3f01c28,c0bd56e6,c0e09254,c3f01bbc,...) at cpustop_handler+0x32
ipi_nmi_handler(c0e09254,c3f01bbc,c0881f84,c0e09254,c417daa0,...) at ipi_nmi_handler+0x2f
trap(c3f01c34) at trap+0x36
calltrap() at calltrap+0x6
--- trap 0x13, eip = 0xc0babf15, esp = 0xc3f01c74, ebp = 0xc3f01c74 ---
acpi_cpu_c1(1,0,3,c08b76a1,c0e1b3e0,...) at acpi_cpu_c1+0x5
acpi_cpu_idle(c3f01cb4,c0bc36cb,0,c3f01cf8,c08b799e,...) at acpi_cpu_idle+0x11c
cpu_idle_acpi(0,c3f01cf8,c08b799e,0,c3f01cd4,...) at cpu_idle_acpi+0x1b
cpu_idle(0,c3f01cd4,c0caa6ae,3b0,c417fb40,...) at cpu_idle+0x1b
sched_idletd(0,c3f01d38,c0ca3ea7,343,c417daa0,...) at sched_idletd+0x23e
fork_exit(c08b7760,0,c3f01d38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xc3f01d70, ebp = 0 ---

Tracing command init pid 1 tid 100002 td 0xc417fd80
sched_switch(c417fd80,0,104,191,99ce52a4,...) at sched_switch+0x406
mi_switch(104,0,c0cad0c9,1eb,5c,...) at mi_switch+0x200
sleepq_switch(c417fd80,0,c0cad0c9,1a0,5c,...) at sleepq_switch+0x15f
sleepq_catch_signals(c0cad0c9,160,0,100,100,...) at sleepq_catch_signals+0xb7
sleepq_wait_sig(c417dd48,5c,c0caf8d6,100,0,...) at sleepq_wait_sig+0x17
_sleep(c417dd48,c417ddd0,15c,c0caf8d6,0,...) at _sleep+0x354
kern_wait(c417fd80,ffffffff,c3efdc74,2,0,...) at kern_wait+0xb76
wait4(c417fd80,c3efdcf8,10,c417fd80,c0d965c4,...) at wait4+0x3b
syscall(c3efdd38) at syscall+0x2b4
Xint0x80_syscall() at Xint0x80_syscall+0x20
--- syscall (7, FreeBSD ELF32, wait4), eip = 0x8054eaf, esp = 0xbfbfe86c, ebp = 0xbfbfe888 ---

Tracing command audit pid 10 tid 100001 td 0xc4181000
sched_switch(c4181000,0,104,191,b7f53114,...) at sched_switch+0x406
mi_switch(104,0,c0cad0c9,1eb,0,...) at mi_switch+0x200
sleepq_switch(c4181000,0,c0cad0c9,260,c4181000,...) at sleepq_switch+0x15f
sleepq_wait(c0f80900,0,c3efac9c,1,0,...) at sleepq_wait+0x63
_cv_wait(c0f80900,c0f808e4,c0ccd278,194,0,...) at _cv_wait+0x240
audit_worker(0,c3efad38,c0ca3ea7,343,c417e000,...) at audit_worker+0x84
fork_exit(c0a9aa90,0,c3efad38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xc3efad70, ebp = 0 ---

Tracing command kernel pid 0 tid 100029 td 0xc436b240
sched_switch(c436b240,0,104,191,b7552778,...) at sched_switch+0x406
mi_switch(104,0,c0cad0c9,1eb,0,...) at mi_switch+0x200
sleepq_switch(c436b240,0,c0cad0c9,260,c436b240,...) at sleepq_switch+0x15f
sleepq_wait(c4387340,0,c0ca96a7,c0c9e2e9,0,...) at sleepq_wait+0x63
msleep_spin(c4387340,c4387358,c0c9e2e9,0,c0ca723a,...) at msleep_spin+0x21d
taskqueue_thread_loop(c438f5a0,c3f78d38,c0ca3ea7,343,c0e06e40,...) at taskqueue_thread_loop+0x94
fork_exit(c08d1150,c438f5a0,c3f78d38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xc3f78d70, ebp = 0 ---

Tracing command kernel pid 0 tid 100027 td 0xc42066c0
sched_switch(c42066c0,0,104,191,1c8e6ae0,...) at sched_switch+0x406
mi_switch(104,0,c0cad0c9,1eb,0,...) at mi_switch+0x200
sleepq_switch(c42066c0,0,c0cad0c9,260,c42066c0,...) at sleepq_switch+0x15f
sleepq_wait(c4344100,0,c0ca96a7,c0c9e2e9,0,...) at sleepq_wait+0x63
msleep_spin(c4344100,c4344118,c0c9e2e9,0,c0ca723a,...) at msleep_spin+0x21d
taskqueue_thread_loop(c0dd81a0,c3f4cd38,c0ca3ea7,343,c0e06e40,...) at taskqueue_thread_loop+0x94
fork_exit(c08d1150,c0dd81a0,c3f4cd38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xc3f4cd70, ebp = 0 ---

Tracing command kernel pid 0 tid 100026 td 0xc4206900
sched_switch(c4206900,0,104,191,1c8e4c44,...) at sched_switch+0x406
mi_switch(104,0,c0cad0c9,1eb,0,...) at mi_switch+0x200
sleepq_switch(c4206900,0,c0cad0c9,260,c4206900,...) at sleepq_switch+0x15f
sleepq_wait(c4344100,0,c0ca96a7,c0c9e2e9,0,...) at sleepq_wait+0x63
msleep_spin(c4344100,c4344118,c0c9e2e9,0,c0ca723a,...) at msleep_spin+0x21d
taskqueue_thread_loop(c0dd81a0,c3f49d38,c0ca3ea7,343,c0e06e40,...) at taskqueue_thread_loop+0x94
fork_exit(c08d1150,c0dd81a0,c3f49d38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xc3f49d70, ebp = 0 ---

Tracing command kernel pid 0 tid 100025 td 0xc4206b40
sched_switch(c4206b40,0,104,191,1c8e2774,...) at sched_switch+0x406
mi_switch(104,0,c0cad0c9,1eb,0,...) at mi_switch+0x200
sleepq_switch(c4206b40,0,c0cad0c9,260,c4206b40,...) at sleepq_switch+0x15f
sleepq_wait(c4344100,0,c0ca96a7,c0c9e2e9,0,...) at sleepq_wait+0x63
msleep_spin(c4344100,c4344118,c0c9e2e9,0,c0ca723a,...) at msleep_spin+0x21d
taskqueue_thread_loop(c0dd81a0,c3f46d38,c0ca3ea7,343,c0e06e40,...) at taskqueue_thread_loop+0x94
fork_exit(c08d1150,c0dd81a0,c3f46d38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xc3f46d70, ebp = 0 ---

Tracing command kernel pid 0 tid 100020 td 0xc43596c0
sched_switch(c43596c0,0,104,191,328a8cc8,...) at sched_switch+0x406
mi_switch(104,0,c0cad0c9,1eb,0,...) at mi_switch+0x200
sleepq_switch(c43596c0,0,c0cad0c9,260,0,...) at sleepq_switch+0x15f
sleepq_wait(c4344380,0,c0c9e2e9,0,0,...) at sleepq_wait+0x63
_sleep(c4344380,c4344398,0,c0c9e2e9,0,...) at _sleep+0x36b
taskqueue_thread_loop(c0e1b5c8,c3f37d38,c0ca3ea7,343,c0e06e40,...) at taskqueue_thread_loop+0xba
fork_exit(c08d1150,c0e1b5c8,c3f37d38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xc3f37d70, ebp = 0 ---

Tracing command kernel pid 0 tid 100018 td 0xc4359b40
sched_switch(c4359b40,0,104,191,1c7b7198,...) at sched_switch+0x406
mi_switch(104,0,c0cad0c9,1eb,0,...) at mi_switch+0x200
sleepq_switch(c4359b40,0,c0cad0c9,260,0,...) at sleepq_switch+0x15f
sleepq_wait(c4344600,0,c0c9e2e9,0,0,...) at sleepq_wait+0x63
_sleep(c4344600,c4344618,0,c0c9e2e9,0,...) at _sleep+0x36b
taskqueue_thread_loop(c0e076d8,c3f31d38,c0ca3ea7,343,c0e06e40,...) at taskqueue_thread_loop+0xba
fork_exit(c08d1150,c0e076d8,c3f31d38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xc3f31d70, ebp = 0 ---

Tracing command kernel pid 0 tid 100016 td 0xc4181480
sched_switch(c4181480,0,104,191,5738d30a,...) at sched_switch+0x406
mi_switch(104,0,c0cad0c9,1eb,0,...) at mi_switch+0x200
sleepq_switch(c4181480,0,c0cad0c9,260,0,...) at sleepq_switch+0x15f
sleepq_wait(c4164e00,0,c0c9e2e9,0,0,...) at sleepq_wait+0x63
_sleep(c4164e00,c4164e18,0,c0c9e2e9,0,...) at _sleep+0x36b
taskqueue_thread_loop(c0e1a060,c3f2bd38,c0ca3ea7,343,c0e06e40,...) at taskqueue_thread_loop+0xba
fork_exit(c08d1150,c0e1a060,c3f2bd38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xc3f2bd70, ebp = 0 ---

Tracing command kernel pid 0 tid 100000 td 0xc0e070f0
sched_switch(c0e070f0,0,104,191,49f80ec0,...) at sched_switch+0x406
mi_switch(104,0,c0cad0c9,1eb,44,...) at mi_switch+0x200
sleepq_switch(c0e070f0,0,c0cad0c9,283,0,...) at sleepq_switch+0x15f
sleepq_timedwait(c0e06e40,44,c0caaf23,0,0,...) at sleepq_timedwait+0x6b
_sleep(c0e06e40,0,44,c0caaf23,2710,...) at _sleep+0x339
scheduler(0,141ec00,141ec00,141e000,1425000,...) at scheduler+0x23e
mi_startup() at mi_startup+0x96
begin() at begin+0x2c
db:0:allt>  call doadump
Physical memory: 1007 MB
Dumping 67 MB: 52 36 20 4
Dump complete
= 0xf
db:0:doadump>  reset

$ svn diff -x -p /usr/src/sys
Index: /usr/src/sys/ufs/ufs/ufs_dirhash.c
===================================================================
--- /usr/src/sys/ufs/ufs/ufs_dirhash.c	(revision 202614)
+++ /usr/src/sys/ufs/ufs/ufs_dirhash.c	(working copy)
@@ -68,8 +68,6 @@ __FBSDID("$FreeBSD$");
 
 static MALLOC_DEFINE(M_DIRHASH, "ufs_dirhash", "UFS directory hash tables");
 
-static SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD, 0, "UFS filesystem");
-
 static int ufs_mindirhashsize = DIRBLKSIZ * 5;
 SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_minsize, CTLFLAG_RW,
     &ufs_mindirhashsize,
Index: /usr/src/sys/ufs/ufs/inode.h
===================================================================
--- /usr/src/sys/ufs/ufs/inode.h	(revision 202614)
+++ /usr/src/sys/ufs/ufs/inode.h	(working copy)
@@ -120,7 +120,7 @@ struct inode {
 #define	IN_CHANGE	0x0002		/* Inode change time update request. */
 #define	IN_UPDATE	0x0004		/* Modification time update request. */
 #define	IN_MODIFIED	0x0008		/* Inode has been modified. */
-#define	IN_RENAME	0x0010		/* Inode is being renamed. */
+#define	IN_NEEDSYNC	0x0010		/* Inode requires fsync. */
 #define	IN_LAZYMOD	0x0040		/* Modified, but don't write yet. */
 #define	IN_SPACECOUNTED	0x0080		/* Blocks to be freed in free count. */
 #define	IN_LAZYACCESS	0x0100		/* Process IN_ACCESS after the
@@ -175,6 +175,7 @@ struct indir {
 /* Determine if soft dependencies are being done */
 #define DOINGSOFTDEP(vp)	((vp)->v_mount->mnt_flag & MNT_SOFTDEP)
 #define DOINGASYNC(vp)		((vp)->v_mount->mnt_kern_flag & MNTK_ASYNC)
+#define	DOINGSUJ(vp)		((vp)->v_mount->mnt_flag & MNT_SUJ)
 
 /* This overlays the fid structure (see mount.h). */
 struct ufid {
Index: /usr/src/sys/ufs/ufs/dinode.h
===================================================================
--- /usr/src/sys/ufs/ufs/dinode.h	(revision 202614)
+++ /usr/src/sys/ufs/ufs/dinode.h	(working copy)
@@ -146,7 +146,8 @@ struct ufs2_dinode {
 	ufs2_daddr_t	di_db[NDADDR];	/* 112: Direct disk blocks. */
 	ufs2_daddr_t	di_ib[NIADDR];	/* 208: Indirect disk blocks. */
 	u_int64_t	di_modrev;	/* 232: i_modrev for NFSv4 */
-	int64_t		di_spare[2];	/* 240: Reserved; currently unused */
+	ino_t		di_freelink;	/* 240: SUJ: Next unlinked inode. */
+	uint32_t	di_spare[3];	/* 244: Reserved; currently unused */
 };
 
 /*
@@ -167,9 +168,7 @@ struct ufs2_dinode {
 struct ufs1_dinode {
 	u_int16_t	di_mode;	/*   0: IFMT, permissions; see below. */
 	int16_t		di_nlink;	/*   2: File link count. */
-	union {
-		u_int16_t oldids[2];	/*   4: Ffs: old user and group ids. */
-	} di_u;
+	ino_t		di_freelink;	/*   4: SUJ: Next unlinked inode. */
 	u_int64_t	di_size;	/*   8: File byte count. */
 	int32_t		di_atime;	/*  16: Last access time. */
 	int32_t		di_atimensec;	/*  20: Last access time. */
@@ -186,7 +185,5 @@ struct ufs1_dinode {
 	u_int32_t	di_gid;		/* 116: File group. */
 	u_int64_t	di_modrev;	/* 120: i_modrev for NFSv4 */
 };
-#define	di_ogid		di_u.oldids[1]
-#define	di_ouid		di_u.oldids[0]
 
 #endif /* _UFS_UFS_DINODE_H_ */
Index: /usr/src/sys/ufs/ufs/ufs_vnops.c
===================================================================
--- /usr/src/sys/ufs/ufs/ufs_vnops.c	(revision 202614)
+++ /usr/src/sys/ufs/ufs/ufs_vnops.c	(working copy)
@@ -114,6 +114,8 @@ static vop_close_t	ufsfifo_close;
 static vop_kqfilter_t	ufsfifo_kqfilter;
 static vop_pathconf_t	ufsfifo_pathconf;
 
+SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD, 0, "UFS filesystem");
+
 /*
  * A virgin directory (no blushing please).
  */
@@ -974,6 +976,9 @@ ufs_link(ap)
 		error = EXDEV;
 		goto out;
 	}
+	if (VTOI(tdvp)->i_effnlink < 2)
+		panic("ufs_link: Bad link count %d on parent",
+		    VTOI(tdvp)->i_effnlink);
 	ip = VTOI(vp);
 	if ((nlink_t)ip->i_nlink >= LINK_MAX) {
 		error = EMLINK;
@@ -988,11 +993,11 @@ ufs_link(ap)
 	DIP_SET(ip, i_nlink, ip->i_nlink);
 	ip->i_flag |= IN_CHANGE;
 	if (DOINGSOFTDEP(vp))
-		softdep_change_linkcnt(ip);
+		softdep_setup_link(VTOI(tdvp), ip);
 	error = UFS_UPDATE(vp, !(DOINGSOFTDEP(vp) | DOINGASYNC(vp)));
 	if (!error) {
 		ufs_makedirentry(ip, cnp, &newdir);
-		error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL);
+		error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL, 0);
 	}
 
 	if (error) {
@@ -1001,7 +1006,7 @@ ufs_link(ap)
 		DIP_SET(ip, i_nlink, ip->i_nlink);
 		ip->i_flag |= IN_CHANGE;
 		if (DOINGSOFTDEP(vp))
-			softdep_change_linkcnt(ip);
+			softdep_revert_link(VTOI(tdvp), ip);
 	}
 out:
 	return (error);
@@ -1043,7 +1048,7 @@ ufs_whiteout(ap)
 		newdir.d_namlen = cnp->cn_namelen;
 		bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1);
 		newdir.d_type = DT_WHT;
-		error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL);
+		error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL, 0);
 		break;
 
 	case DELETE:
@@ -1062,6 +1067,11 @@ ufs_whiteout(ap)
 	return (error);
 }
 
+static volatile int rename_restarts;
+SYSCTL_INT(_vfs_ufs, OID_AUTO, rename_restarts, CTLFLAG_RD,
+    __DEVOLATILE(int *, &rename_restarts), 0,
+    "Times rename had to restart due to lock contention");
+
 /*
  * Rename system call.
  * 	rename("foo", "bar");
@@ -1101,111 +1111,183 @@ ufs_rename(ap)
 	struct vnode *tdvp = ap->a_tdvp;
 	struct vnode *fvp = ap->a_fvp;
 	struct vnode *fdvp = ap->a_fdvp;
+	struct vnode *nvp;
 	struct componentname *tcnp = ap->a_tcnp;
 	struct componentname *fcnp = ap->a_fcnp;
 	struct thread *td = fcnp->cn_thread;
-	struct inode *ip, *xp, *dp;
+	struct inode *fip, *tip, *tdp, *fdp;
 	struct direct newdir;
-	int doingdirectory = 0, oldparent = 0, newparent = 0;
+	off_t endoff;
+	int doingdirectory, newparent;
 	int error = 0, ioflag;
-	ino_t fvp_ino;
+	struct mount *mp;
+	ino_t ino;
 
 #ifdef INVARIANTS
 	if ((tcnp->cn_flags & HASBUF) == 0 ||
 	    (fcnp->cn_flags & HASBUF) == 0)
 		panic("ufs_rename: no name");
 #endif
+	endoff = 0;
+	mp = tdvp->v_mount;
+	VOP_UNLOCK(tdvp, 0);
+	if (tvp && tvp != tdvp)
+		VOP_UNLOCK(tvp, 0);
 	/*
 	 * Check for cross-device rename.
 	 */
 	if ((fvp->v_mount != tdvp->v_mount) ||
 	    (tvp && (fvp->v_mount != tvp->v_mount))) {
 		error = EXDEV;
-abortit:
-		if (tdvp == tvp)
-			vrele(tdvp);
-		else
-			vput(tdvp);
-		if (tvp)
-			vput(tvp);
-		vrele(fdvp);
+		mp = NULL;
+		goto releout;
+	}
+	error = vfs_busy(mp, 0);
+	if (error) {
+		mp = NULL;
+		goto releout;
+	}
+relock:
+	/* 
+	 * We need to acquire 2 to 4 locks depending on whether tvp is NULL
+	 * and fdvp and tdvp are the same directory.  Subsequently we need
+	 * to double-check all paths and in the directory rename case we
+	 * need to verify that we are not creating a directory loop.  To
+	 * handle this we acquire all but fdvp using non-blocking
+	 * acquisitions.  If we fail to acquire any lock in the path we will
+	 * drop all held locks, acquire the new lock in a blocking fashion,
+	 * and then release it and restart the rename.  This acquire/release
+	 * step ensures that we do not spin on a lock waiting for release.
+	 */
+	error = vn_lock(fdvp, LK_EXCLUSIVE);
+	if (error)
+		goto releout;
+	if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
+		VOP_UNLOCK(fdvp, 0);
+		error = vn_lock(tdvp, LK_EXCLUSIVE);
+		if (error)
+			goto releout;
+		VOP_UNLOCK(tdvp, 0);
+		atomic_add_int(&rename_restarts, 1);
+		goto relock;
+	}
+	/*
+	 * Re-resolve fvp to be certain it still exists and fetch the
+	 * correct vnode.
+	 */
+	error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino);
+	if (error) {
+		VOP_UNLOCK(fdvp, 0);
+		VOP_UNLOCK(tdvp, 0);
+		goto releout;
+	}
+	error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
+	if (error) {
+		VOP_UNLOCK(fdvp, 0);
+		VOP_UNLOCK(tdvp, 0);
+		if (error != EBUSY)
+			goto releout;
+		error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp);
+		if (error != 0)
+			goto releout;
+		VOP_UNLOCK(nvp, 0);
 		vrele(fvp);
-		return (error);
+		fvp = nvp;
+		atomic_add_int(&rename_restarts, 1);
+		goto relock;
 	}
-
+	vrele(fvp);
+	fvp = nvp;
+	/*
+	 * Re-resolve tvp and acquire the vnode lock if present.
+	 */
+	error = ufs_lookup_ino(tdvp, NULL, tcnp, &ino);
+	if (error != 0 && error != EJUSTRETURN) {
+		VOP_UNLOCK(fdvp, 0);
+		VOP_UNLOCK(tdvp, 0);
+		VOP_UNLOCK(fvp, 0);
+		goto releout;
+	}
+	/*
+	 * If tvp disappeared we just carry on.
+	 */
+	if (error == EJUSTRETURN && tvp != NULL) {
+		vrele(tvp);
+		tvp = NULL;
+	}
+	/*
+	 * Get the tvp ino if the lookup succeeded.  We may have to restart
+	 * if the non-blocking acquire fails.
+	 */
+	if (error == 0) {
+		nvp = NULL;
+		error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
+		if (tvp)
+			vrele(tvp);
+		tvp = nvp;
+		if (error) {
+			VOP_UNLOCK(fdvp, 0);
+			VOP_UNLOCK(tdvp, 0);
+			VOP_UNLOCK(fvp, 0);
+			if (error != EBUSY)
+				goto releout;
+			error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp);
+			if (error != 0)
+				goto releout;
+			VOP_UNLOCK(nvp, 0);
+			atomic_add_int(&rename_restarts, 1);
+			goto relock;
+		}
+	}
+	fdp = VTOI(fdvp);
+	fip = VTOI(fvp);
+	tdp = VTOI(tdvp);
+	tip = NULL;
+	if (tvp)
+		tip = VTOI(tvp);
 	if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
 	    (VTOI(tdvp)->i_flags & APPEND))) {
 		error = EPERM;
-		goto abortit;
+		goto unlockout;
 	}
-
 	/*
 	 * Renaming a file to itself has no effect.  The upper layers should
-	 * not call us in that case.  Temporarily just warn if they do.
+	 * not call us in that case.  However, things could change after
+	 * we drop the locks above.
 	 */
 	if (fvp == tvp) {
-		printf("ufs_rename: fvp == tvp (can't happen)\n");
 		error = 0;
-		goto abortit;
+		goto unlockout;
 	}
-
-	if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
-		goto abortit;
-	dp = VTOI(fdvp);
-	ip = VTOI(fvp);
-	if (ip->i_nlink >= LINK_MAX) {
-		VOP_UNLOCK(fvp, 0);
+	doingdirectory = 0;
+	newparent = 0;
+	ino = fip->i_number;
+	if (fip->i_nlink >= LINK_MAX) {
 		error = EMLINK;
-		goto abortit;
+		goto unlockout;
 	}
-	if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))
-	    || (dp->i_flags & APPEND)) {
-		VOP_UNLOCK(fvp, 0);
+	if ((fip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))
+	    || (fdp->i_flags & APPEND)) {
 		error = EPERM;
-		goto abortit;
+		goto unlockout;
 	}
-	if ((ip->i_mode & IFMT) == IFDIR) {
+	if ((fip->i_mode & IFMT) == IFDIR) {
 		/*
 		 * Avoid ".", "..", and aliases of "." for obvious reasons.
 		 */
 		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
-		    dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT ||
-		    (ip->i_flag & IN_RENAME)) {
-			VOP_UNLOCK(fvp, 0);
+		    fdp == fip ||
+		    (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) {
 			error = EINVAL;
-			goto abortit;
+			goto unlockout;
 		}
-		ip->i_flag |= IN_RENAME;
-		oldparent = dp->i_number;
+		if (fdp->i_number != tdp->i_number)
+			newparent = tdp->i_number;
 		doingdirectory = 1;
 	}
-	vrele(fdvp);
-
-	/*
-	 * When the target exists, both the directory
-	 * and target vnodes are returned locked.
-	 */
-	dp = VTOI(tdvp);
-	xp = NULL;
-	if (tvp)
-		xp = VTOI(tvp);
-
-	/*
-	 * 1) Bump link count while we're moving stuff
-	 *    around.  If we crash somewhere before
-	 *    completing our work, the link count
-	 *    may be wrong, but correctable.
-	 */
-	ip->i_effnlink++;
-	ip->i_nlink++;
-	DIP_SET(ip, i_nlink, ip->i_nlink);
-	ip->i_flag |= IN_CHANGE;
-	if (DOINGSOFTDEP(fvp))
-		softdep_change_linkcnt(ip);
-	if ((error = UFS_UPDATE(fvp, !(DOINGSOFTDEP(fvp) |
-				       DOINGASYNC(fvp)))) != 0) {
-		VOP_UNLOCK(fvp, 0);
-		goto bad;
+	if (fvp->v_mountedhere != NULL || (tvp && tvp->v_mountedhere != NULL)) {
+		error = EXDEV;
+		goto unlockout;
 	}
 
 	/*
@@ -1214,88 +1296,93 @@ ufs_rename(ap)
 	 * directory hierarchy above the target, as this would
 	 * orphan everything below the source directory. Also
 	 * the user must have write permission in the source so
-	 * as to be able to change "..". We must repeat the call
-	 * to namei, as the parent directory is unlocked by the
-	 * call to checkpath().
+	 * as to be able to change "..".
 	 */
-	error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread);
-	fvp_ino = ip->i_number;
-	VOP_UNLOCK(fvp, 0);
-	if (oldparent != dp->i_number)
-		newparent = dp->i_number;
 	if (doingdirectory && newparent) {
-		if (error)	/* write access check above */
-			goto bad;
-		if (xp != NULL)
-			vput(tvp);
-		error = ufs_checkpath(fvp_ino, dp, tcnp->cn_cred);
+		error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread);
 		if (error)
-			goto out;
+			goto unlockout;
+		error = ufs_checkpath(ino, fdp->i_number, tdp, tcnp->cn_cred,
+		    &ino);
+		/*
+		 * We encountered a lock that we have to wait for.  Unlock
+		 * everything else and VGET before restarting.
+		 */
+		if (ino) {
+			VOP_UNLOCK(fdvp, 0);
+			VOP_UNLOCK(fvp, 0);
+			VOP_UNLOCK(tdvp, 0);
+			if (tvp)
+				VOP_UNLOCK(tvp, 0);
+			error = VFS_VGET(mp, ino, LK_SHARED, &nvp);
+			if (error == 0)
+				vput(nvp);
+			atomic_add_int(&rename_restarts, 1);
+			goto relock;
+		}
+		if (error)
+			goto unlockout;
 		if ((tcnp->cn_flags & SAVESTART) == 0)
 			panic("ufs_rename: lost to startdir");
-		VREF(tdvp);
-		error = relookup(tdvp, &tvp, tcnp);
-		if (error)
-			goto out;
-		vrele(tdvp);
-		dp = VTOI(tdvp);
-		xp = NULL;
-		if (tvp)
-			xp = VTOI(tvp);
 	}
+	if (fip->i_effnlink == 0 || fdp->i_effnlink == 0 ||
+	    tdp->i_effnlink == 0)
+		panic("Bad effnlink fip %p, fdp %p, tdp %p", fip, fdp, tdp);
+
 	/*
+	 * 1) Bump link count while we're moving stuff
+	 *    around.  If we crash somewhere before
+	 *    completing our work, the link count
+	 *    may be wrong, but correctable.
+	 */
+	fip->i_effnlink++;
+	fip->i_nlink++;
+	DIP_SET(fip, i_nlink, fip->i_nlink);
+	fip->i_flag |= IN_CHANGE;
+	if (DOINGSOFTDEP(fvp))
+		softdep_setup_link(tdp, fip);
+	error = UFS_UPDATE(fvp, !(DOINGSOFTDEP(fvp) | DOINGASYNC(fvp)));
+	if (error)
+		goto bad;
+
+	/*
 	 * 2) If target doesn't exist, link the target
 	 *    to the source and unlink the source.
 	 *    Otherwise, rewrite the target directory
 	 *    entry to reference the source inode and
 	 *    expunge the original entry's existence.
 	 */
-	if (xp == NULL) {
-		if (dp->i_dev != ip->i_dev)
+	if (tip == NULL) {
+		if (tdp->i_dev != fip->i_dev)
 			panic("ufs_rename: EXDEV");
-		/*
-		 * Account for ".." in new directory.
-		 * When source and destination have the same
-		 * parent we don't fool with the link count.
-		 */
 		if (doingdirectory && newparent) {
-			if ((nlink_t)dp->i_nlink >= LINK_MAX) {
+			/*
+			 * Account for ".." in new directory.
+			 * When source and destination have the same
+			 * parent we don't adjust the link count.  The
+			 * actual link modification is completed when
+			 * .. is rewritten below.
+			 */
+			if ((nlink_t)tdp->i_nlink >= LINK_MAX) {
 				error = EMLINK;
 				goto bad;
 			}
-			dp->i_effnlink++;
-			dp->i_nlink++;
-			DIP_SET(dp, i_nlink, dp->i_nlink);
-			dp->i_flag |= IN_CHANGE;
-			if (DOINGSOFTDEP(tdvp))
-				softdep_change_linkcnt(dp);
-			error = UFS_UPDATE(tdvp, !(DOINGSOFTDEP(tdvp) |
-						   DOINGASYNC(tdvp)));
-			if (error)
-				goto bad;
 		}
-		ufs_makedirentry(ip, tcnp, &newdir);
-		error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL);
-		if (error) {
-			if (doingdirectory && newparent) {
-				dp->i_effnlink--;
-				dp->i_nlink--;
-				DIP_SET(dp, i_nlink, dp->i_nlink);
-				dp->i_flag |= IN_CHANGE;
-				if (DOINGSOFTDEP(tdvp))
-					softdep_change_linkcnt(dp);
-				(void)UFS_UPDATE(tdvp, 1);
-			}
+		ufs_makedirentry(fip, tcnp, &newdir);
+		error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL, 1);
+		if (error)
 			goto bad;
-		}
-		vput(tdvp);
+		/* Setup tdvp for directory compaction if needed. */
+		if (tdp->i_count && tdp->i_endoff &&
+		    tdp->i_endoff < tdp->i_size)
+			endoff = tdp->i_endoff;
 	} else {
-		if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev)
+		if (tip->i_dev != tdp->i_dev || tip->i_dev != fip->i_dev)
 			panic("ufs_rename: EXDEV");
 		/*
 		 * Short circuit rename(foo, foo).
 		 */
-		if (xp->i_number == ip->i_number)
+		if (tip->i_number == fip->i_number)
 			panic("ufs_rename: same file");
 		/*
 		 * If the parent directory is "sticky", then the caller
@@ -1303,7 +1390,7 @@ ufs_rename(ap)
 		 * destination of the rename.  This implements append-only
 		 * directories.
 		 */
-		if ((dp->i_mode & S_ISTXT) &&
+		if ((tdp->i_mode & S_ISTXT) &&
 		    VOP_ACCESS(tdvp, VADMIN, tcnp->cn_cred, td) &&
 		    VOP_ACCESS(tvp, VADMIN, tcnp->cn_cred, td)) {
 			error = EPERM;
@@ -1314,9 +1401,9 @@ ufs_rename(ap)
 		 * to it. Also, ensure source and target are compatible
 		 * (both directories, or both not directories).
 		 */
-		if ((xp->i_mode&IFMT) == IFDIR) {
-			if ((xp->i_effnlink > 2) ||
-			    !ufs_dirempty(xp, dp->i_number, tcnp->cn_cred)) {
+		if ((tip->i_mode & IFMT) == IFDIR) {
+			if ((tip->i_effnlink > 2) ||
+			    !ufs_dirempty(tip, tdp->i_number, tcnp->cn_cred)) {
 				error = ENOTEMPTY;
 				goto bad;
 			}
@@ -1329,21 +1416,31 @@ ufs_rename(ap)
 			error = EISDIR;
 			goto bad;
 		}
-		error = ufs_dirrewrite(dp, xp, ip->i_number,
-		    IFTODT(ip->i_mode),
-		    (doingdirectory && newparent) ? newparent : doingdirectory);
-		if (error)
-			goto bad;
 		if (doingdirectory) {
 			if (!newparent) {
-				dp->i_effnlink--;
+				tdp->i_effnlink--;
 				if (DOINGSOFTDEP(tdvp))
-					softdep_change_linkcnt(dp);
+					softdep_change_linkcnt(tdp);
 			}
-			xp->i_effnlink--;
+			tip->i_effnlink--;
 			if (DOINGSOFTDEP(tvp))
-				softdep_change_linkcnt(xp);
+				softdep_change_linkcnt(tip);
 		}
+		error = ufs_dirrewrite(tdp, tip, fip->i_number,
+		    IFTODT(fip->i_mode),
+		    (doingdirectory && newparent) ? newparent : doingdirectory);
+		if (error) {
+			if (doingdirectory) {
+				if (!newparent) {
+					tdp->i_effnlink++;
+					if (DOINGSOFTDEP(tdvp))
+						softdep_change_linkcnt(tdp);
+				}
+				tip->i_effnlink++;
+				if (DOINGSOFTDEP(tvp))
+					softdep_change_linkcnt(tip);
+			}
+		}
 		if (doingdirectory && !DOINGSOFTDEP(tvp)) {
 			/*
 			 * Truncate inode. The only stuff left in the directory
@@ -1357,115 +1454,107 @@ ufs_rename(ap)
 			 * them now.
 			 */
 			if (!newparent) {
-				dp->i_nlink--;
-				DIP_SET(dp, i_nlink, dp->i_nlink);
-				dp->i_flag |= IN_CHANGE;
+				tdp->i_nlink--;
+				DIP_SET(tdp, i_nlink, tdp->i_nlink);
+				tdp->i_flag |= IN_CHANGE;
 			}
-			xp->i_nlink--;
-			DIP_SET(xp, i_nlink, xp->i_nlink);
-			xp->i_flag |= IN_CHANGE;
+			tip->i_nlink--;
+			DIP_SET(tip, i_nlink, tip->i_nlink);
+			tip->i_flag |= IN_CHANGE;
 			ioflag = IO_NORMAL;
 			if (!DOINGASYNC(tvp))
 				ioflag |= IO_SYNC;
+			/* Don't go to bad here as the new link exists. */
 			if ((error = UFS_TRUNCATE(tvp, (off_t)0, ioflag,
 			    tcnp->cn_cred, tcnp->cn_thread)) != 0)
-				goto bad;
+				goto unlockout;
 		}
-		vput(tdvp);
-		vput(tvp);
-		xp = NULL;
 	}
 
 	/*
-	 * 3) Unlink the source.
+	 * 3) Unlink the source.  We have to resolve the path again to
+	 * fixup the directory offset and count for ufs_dirremove.
 	 */
-	fcnp->cn_flags &= ~MODMASK;
-	fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
-	if ((fcnp->cn_flags & SAVESTART) == 0)
-		panic("ufs_rename: lost from startdir");
-	VREF(fdvp);
-	error = relookup(fdvp, &fvp, fcnp);
-	if (error == 0)
-		vrele(fdvp);
-	if (fvp != NULL) {
-		xp = VTOI(fvp);
-		dp = VTOI(fdvp);
-	} else {
-		/*
-		 * From name has disappeared.  IN_RENAME is not sufficient
-		 * to protect against directory races due to timing windows,
-		 * so we have to remove the panic.  XXX the only real way
-		 * to solve this issue is at a much higher level.  By the
-		 * time we hit ufs_rename() it's too late.
-		 */
-#if 0
-		if (doingdirectory)
-			panic("ufs_rename: lost dir entry");
-#endif
-		vrele(ap->a_fvp);
-		return (0);
+	if (fdvp == tdvp) {
+		error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino);
+		if (error)
+			panic("ufs_rename: from entry went away!");
+		if (ino != fip->i_number)
+			panic("ufs_rename: ino mismatch %d != %d\n", ino,
+			    fip->i_number);
 	}
 	/*
-	 * Ensure that the directory entry still exists and has not
-	 * changed while the new name has been entered. If the source is
-	 * a file then the entry may have been unlinked or renamed. In
-	 * either case there is no further work to be done. If the source
-	 * is a directory then it cannot have been rmdir'ed; the IN_RENAME
-	 * flag ensures that it cannot be moved by another rename or removed
-	 * by a rmdir.
+	 * If the source is a directory with a
+	 * new parent, the link count of the old
+	 * parent directory must be decremented
+	 * and ".." set to point to the new parent.
 	 */
-	if (xp != ip) {
+	if (doingdirectory && newparent) {
 		/*
-		 * From name resolves to a different inode.  IN_RENAME is
-		 * not sufficient protection against timing window races
-		 * so we can't panic here.  XXX the only real way
-		 * to solve this issue is at a much higher level.  By the
-		 * time we hit ufs_rename() it's too late.
+		 * If tip exists we simply use its link, otherwise we must
+		 * add a new one.
 		 */
-#if 0
-		if (doingdirectory)
-			panic("ufs_rename: lost dir entry");
-#endif
-	} else {
-		/*
-		 * If the source is a directory with a
-		 * new parent, the link count of the old
-		 * parent directory must be decremented
-		 * and ".." set to point to the new parent.
-		 */
-		if (doingdirectory && newparent) {
-			xp->i_offset = mastertemplate.dot_reclen;
-			ufs_dirrewrite(xp, dp, newparent, DT_DIR, 0);
-			cache_purge(fdvp);
+		if (tip == NULL) {
+			tdp->i_effnlink++;
+			tdp->i_nlink++;
+			DIP_SET(tdp, i_nlink, tdp->i_nlink);
+			tdp->i_flag |= IN_CHANGE;
+			if (DOINGSOFTDEP(tdvp))
+				softdep_setup_dotdot_link(tdp, fip);
+			error = UFS_UPDATE(tdvp, !(DOINGSOFTDEP(tdvp) |
+						   DOINGASYNC(tdvp)));
+			/* Don't go to bad here as the new link exists. */
+			if (error)
+				goto unlockout;
 		}
-		error = ufs_dirremove(fdvp, xp, fcnp->cn_flags, 0);
-		xp->i_flag &= ~IN_RENAME;
+		fip->i_offset = mastertemplate.dot_reclen;
+		ufs_dirrewrite(fip, fdp, newparent, DT_DIR, 0);
+		cache_purge(fdvp);
 	}
-	if (dp)
-		vput(fdvp);
-	if (xp)
-		vput(fvp);
-	vrele(ap->a_fvp);
+	error = ufs_dirremove(fdvp, fip, fcnp->cn_flags, 0);
+
+unlockout:
+	vput(fdvp);
+	vput(fvp);
+	if (tvp)
+		vput(tvp);
+	/*
+	 * If compaction or fsync was requested do it now that other locks
+	 * are no longer needed.
+	 */
+	if (error == 0 && endoff != 0) {
+#ifdef UFS_DIRHASH
+		if (tdp->i_dirhash != NULL)
+			ufsdirhash_dirtrunc(tdp, endoff);
+#endif
+		UFS_TRUNCATE(tdvp, endoff, IO_NORMAL | IO_SYNC, tcnp->cn_cred,
+		    td);
+	}
+	if (error == 0 && tdp->i_flag & IN_NEEDSYNC)
+		error = VOP_FSYNC(tdvp, MNT_WAIT, td);
+	vput(tdvp);
+	if (mp)
+		vfs_unbusy(mp);
 	return (error);
 
 bad:
-	if (xp)
-		vput(ITOV(xp));
-	vput(ITOV(dp));
-out:
-	if (doingdirectory)
-		ip->i_flag &= ~IN_RENAME;
-	if (vn_lock(fvp, LK_EXCLUSIVE) == 0) {
-		ip->i_effnlink--;
-		ip->i_nlink--;
-		DIP_SET(ip, i_nlink, ip->i_nlink);
-		ip->i_flag |= IN_CHANGE;
-		ip->i_flag &= ~IN_RENAME;
-		if (DOINGSOFTDEP(fvp))
-			softdep_change_linkcnt(ip);
-		vput(fvp);
-	} else
-		vrele(fvp);
+	fip->i_effnlink--;
+	fip->i_nlink--;
+	DIP_SET(fip, i_nlink, fip->i_nlink);
+	fip->i_flag |= IN_CHANGE;
+	if (DOINGSOFTDEP(fvp))
+		softdep_revert_link(tdp, fip);
+	goto unlockout;
+
+releout:
+	vrele(fdvp);
+	vrele(fvp);
+	vrele(tdvp);
+	if (tvp)
+		vrele(tvp);
+	if (mp)
+		vfs_unbusy(mp);
+
 	return (error);
 }
 
@@ -1664,8 +1753,7 @@ ufs_mkdir(ap)
 	ip->i_effnlink = 2;
 	ip->i_nlink = 2;
 	DIP_SET(ip, i_nlink, 2);
-	if (DOINGSOFTDEP(tvp))
-		softdep_change_linkcnt(ip);
+
 	if (cnp->cn_flags & ISWHITEOUT) {
 		ip->i_flags |= UF_OPAQUE;
 		DIP_SET(ip, i_flags, ip->i_flags);
@@ -1681,8 +1769,8 @@ ufs_mkdir(ap)
 	DIP_SET(dp, i_nlink, dp->i_nlink);
 	dp->i_flag |= IN_CHANGE;
 	if (DOINGSOFTDEP(dvp))
-		softdep_change_linkcnt(dp);
-	error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(dvp) | DOINGASYNC(dvp)));
+		softdep_setup_mkdir(dp, ip);
+	error = UFS_UPDATE(dvp, !(DOINGSOFTDEP(dvp) | DOINGASYNC(dvp)));
 	if (error)
 		goto bad;
 #ifdef MAC
@@ -1791,7 +1879,7 @@ ufs_mkdir(ap)
 	else if (!DOINGSOFTDEP(dvp) && ((error = bwrite(bp))))
 		goto bad;
 	ufs_makedirentry(ip, cnp, &newdir);
-	error = ufs_direnter(dvp, tvp, &newdir, cnp, bp);
+	error = ufs_direnter(dvp, tvp, &newdir, cnp, bp, 0);
 	
 bad:
 	if (error == 0) {
@@ -1807,8 +1895,6 @@ bad:
 		dp->i_nlink--;
 		DIP_SET(dp, i_nlink, dp->i_nlink);
 		dp->i_flag |= IN_CHANGE;
-		if (DOINGSOFTDEP(dvp))
-			softdep_change_linkcnt(dp);
 		/*
 		 * No need to do an explicit VOP_TRUNCATE here, vrele will
 		 * do this for us because we set the link count to 0.
@@ -1818,7 +1904,8 @@ bad:
 		DIP_SET(ip, i_nlink, 0);
 		ip->i_flag |= IN_CHANGE;
 		if (DOINGSOFTDEP(tvp))
-			softdep_change_linkcnt(ip);
+			softdep_revert_mkdir(dp, ip);
+
 		vput(tvp);
 	}
 out:
@@ -1854,10 +1941,13 @@ ufs_rmdir(ap)
 	 * tries to remove a locally mounted on directory).
 	 */
 	error = 0;
-	if ((ip->i_flag & IN_RENAME) || ip->i_effnlink < 2) {
+	if (ip->i_effnlink < 2) {
 		error = EINVAL;
 		goto out;
 	}
+	if (dp->i_effnlink < 3)
+		panic("ufs_dirrem: Bad link count %d on parent",
+		    dp->i_effnlink);
 	if (!ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) {
 		error = ENOTEMPTY;
 		goto out;
@@ -1881,18 +1971,14 @@ ufs_rmdir(ap)
 	 */
 	dp->i_effnlink--;
 	ip->i_effnlink--;
-	if (DOINGSOFTDEP(vp)) {
-		softdep_change_linkcnt(dp);
-		softdep_change_linkcnt(ip);
-	}
+	if (DOINGSOFTDEP(vp))
+		softdep_setup_rmdir(dp, ip);
 	error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1);
 	if (error) {
 		dp->i_effnlink++;
 		ip->i_effnlink++;
-		if (DOINGSOFTDEP(vp)) {
-			softdep_change_linkcnt(dp);
-			softdep_change_linkcnt(ip);
-		}
+		if (DOINGSOFTDEP(vp))
+			softdep_revert_rmdir(dp, ip);
 		goto out;
 	}
 	cache_purge(dvp);
@@ -2401,6 +2487,9 @@ ufs_makeinode(mode, dvp, vpp, cnp)
 	if ((mode & IFMT) == 0)
 		mode |= IFREG;
 
+	if (VTOI(dvp)->i_effnlink < 2)
+		panic("ufs_makeinode: Bad link count %d on parent",
+		    VTOI(dvp)->i_effnlink);
 	error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp);
 	if (error)
 		return (error);
@@ -2530,7 +2619,7 @@ ufs_makeinode(mode, dvp, vpp, cnp)
 	ip->i_nlink = 1;
 	DIP_SET(ip, i_nlink, 1);
 	if (DOINGSOFTDEP(tvp))
-		softdep_change_linkcnt(ip);
+		softdep_setup_create(VTOI(dvp), ip);
 	if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) &&
 	    priv_check_cred(cnp->cn_cred, PRIV_VFS_SETGID, 0)) {
 		ip->i_mode &= ~ISGID;
@@ -2594,7 +2683,7 @@ ufs_makeinode(mode, dvp, vpp, cnp)
 	}
 #endif /* !UFS_ACL */
 	ufs_makedirentry(ip, cnp, &newdir);
-	error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL);
+	error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL, 0);
 	if (error)
 		goto bad;
 	*vpp = tvp;
@@ -2610,7 +2699,7 @@ bad:
 	DIP_SET(ip, i_nlink, 0);
 	ip->i_flag |= IN_CHANGE;
 	if (DOINGSOFTDEP(tvp))
-		softdep_change_linkcnt(ip);
+		softdep_revert_create(VTOI(dvp), ip);
 	vput(tvp);
 	return (error);
 }
Index: /usr/src/sys/ufs/ufs/ufsmount.h
===================================================================
--- /usr/src/sys/ufs/ufs/ufsmount.h	(revision 202614)
+++ /usr/src/sys/ufs/ufs/ufsmount.h	(working copy)
@@ -57,7 +57,11 @@ struct ucred;
 struct uio;
 struct vnode;
 struct ufs_extattr_per_mount;
+struct jblocks;
+struct inodedep;
 
+TAILQ_HEAD(inodedeplst, inodedep);
+
 /* This structure describes the UFS specific mount structure data. */
 struct ufsmount {
 	struct	mount *um_mountp;		/* filesystem vfs structure */
@@ -75,6 +79,11 @@ struct ufsmount {
 	long	um_numindirdeps;		/* outstanding indirdeps */
 	struct	workhead softdep_workitem_pending; /* softdep work queue */
 	struct	worklist *softdep_worklist_tail; /* Tail pointer for above */
+	struct	workhead softdep_journal_pending; /* journal work queue */
+	struct	worklist *softdep_journal_tail;	/* Tail pointer for above */
+	struct	jblocks *softdep_jblocks;	/* Journal block information */
+	struct	inodedeplst softdep_unlinked; /* Unlinked inodes */
+	int	softdep_on_journal;		/* Items on the journal list */
 	int	softdep_on_worklist;		/* Items on the worklist */
 	int	softdep_on_worklist_inprogress;	/* Busy items on worklist */
 	int	softdep_deps;			/* Total dependency count */
Index: /usr/src/sys/ufs/ufs/ufs_lookup.c
===================================================================
--- /usr/src/sys/ufs/ufs/ufs_lookup.c	(revision 202614)
+++ /usr/src/sys/ufs/ufs/ufs_lookup.c	(working copy)
@@ -77,9 +77,6 @@ SYSCTL_INT(_debug, OID_AUTO, dircheck, CTLFLAG_RW,
 /* true if old FS format...*/
 #define OFSFMT(vp)	((vp)->v_mount->mnt_maxsymlinklen <= 0)
 
-static int ufs_lookup_(struct vnode *, struct vnode **, struct componentname *,
-    ino_t *);
-
 static int
 ufs_delete_denied(struct vnode *vdp, struct vnode *tdp, struct ucred *cred,
     struct thread *td)
@@ -189,11 +186,11 @@ ufs_lookup(ap)
 	} */ *ap;
 {
 
-	return (ufs_lookup_(ap->a_dvp, ap->a_vpp, ap->a_cnp, NULL));
+	return (ufs_lookup_ino(ap->a_dvp, ap->a_vpp, ap->a_cnp, NULL));
 }
 
-static int
-ufs_lookup_(struct vnode *vdp, struct vnode **vpp, struct componentname *cnp,
+int
+ufs_lookup_ino(struct vnode *vdp, struct vnode **vpp, struct componentname *cnp,
     ino_t *dd_ino)
 {
 	struct inode *dp;		/* inode for directory being searched */
@@ -524,6 +521,8 @@ notfound:
 	return (ENOENT);
 
 found:
+	if (dd_ino != NULL)
+		*dd_ino = ino;
 	if (numdirpasses == 2)
 		nchstats.ncs_pass2++;
 	/*
@@ -546,11 +545,6 @@ found:
 	if ((flags & ISLASTCN) && nameiop == LOOKUP)
 		dp->i_diroff = i_offset &~ (DIRBLKSIZ - 1);
 
-	if (dd_ino != NULL) {
-		*dd_ino = ino;
-		return (0);
-	}
-
 	/*
 	 * If deleting, and at end of pathname, return
 	 * parameters which can be used to remove file.
@@ -558,17 +552,6 @@ found:
 	if (nameiop == DELETE && (flags & ISLASTCN)) {
 		if (flags & LOCKPARENT)
 			ASSERT_VOP_ELOCKED(vdp, __FUNCTION__);
-		if ((error = VFS_VGET(vdp->v_mount, ino,
-		    LK_EXCLUSIVE, &tdp)) != 0)
-			return (error);
-
-		error = ufs_delete_denied(vdp, tdp, cred, cnp->cn_thread);
-		if (error) {
-			vput(tdp);
-			return (error);
-		}
-
-
 		/*
 		 * Return pointer to current entry in dp->i_offset,
 		 * and distance past previous entry (if there
@@ -585,6 +568,16 @@ found:
 			dp->i_count = 0;
 		else
 			dp->i_count = dp->i_offset - prevoff;
+		if (dd_ino != NULL)
+			return (0);
+		if ((error = VFS_VGET(vdp->v_mount, ino,
+		    LK_EXCLUSIVE, &tdp)) != 0)
+			return (error);
+		error = ufs_delete_denied(vdp, tdp, cred, cnp->cn_thread);
+		if (error) {
+			vput(tdp);
+			return (error);
+		}
 		if (dp->i_number == ino) {
 			VREF(vdp);
 			*vpp = vdp;
@@ -616,6 +609,8 @@ found:
 		dp->i_offset = i_offset;
 		if (dp->i_number == ino)
 			return (EISDIR);
+		if (dd_ino != NULL)
+			return (0);
 		if ((error = VFS_VGET(vdp->v_mount, ino,
 		    LK_EXCLUSIVE, &tdp)) != 0)
 			return (error);
@@ -650,6 +645,8 @@ found:
 		cnp->cn_flags |= SAVENAME;
 		return (0);
 	}
+	if (dd_ino != NULL)
+		return (0);
 
 	/*
 	 * Step through the translation in the name.  We do not `vput' the
@@ -681,7 +678,7 @@ found:
 		 * to the inode we looked up before vdp lock was
 		 * dropped.
 		 */
-		error = ufs_lookup_(pdp, NULL, cnp, &ino1);
+		error = ufs_lookup_ino(pdp, NULL, cnp, &ino1);
 		if (error) {
 			vput(tdp);
 			return (error);
@@ -825,12 +822,13 @@ ufs_makedirentry(ip, cnp, newdirp)
  * soft dependency code).
  */
 int
-ufs_direnter(dvp, tvp, dirp, cnp, newdirbp)
+ufs_direnter(dvp, tvp, dirp, cnp, newdirbp, isrename)
 	struct vnode *dvp;
 	struct vnode *tvp;
 	struct direct *dirp;
 	struct componentname *cnp;
 	struct buf *newdirbp;
+	int isrename;
 {
 	struct ucred *cr;
 	struct thread *td;
@@ -903,22 +901,28 @@ int
 				blkoff += DIRBLKSIZ;
 			}
 			if (softdep_setup_directory_add(bp, dp, dp->i_offset,
-			    dirp->d_ino, newdirbp, 1) == 0) {
-				bdwrite(bp);
+			    dirp->d_ino, newdirbp, 1))
+				dp->i_flag |= IN_NEEDSYNC;
+			if (newdirbp)
+				bdwrite(newdirbp);
+			bdwrite(bp);
+			if ((dp->i_flag & IN_NEEDSYNC) == 0)
 				return (UFS_UPDATE(dvp, 0));
-			}
-			/* We have just allocated a directory block in an
-			 * indirect block. Rather than tracking when it gets
-			 * claimed by the inode, we simply do a VOP_FSYNC
-			 * now to ensure that it is there (in case the user
-			 * does a future fsync). Note that we have to unlock
-			 * the inode for the entry that we just entered, as
-			 * the VOP_FSYNC may need to lock other inodes which
-			 * can lead to deadlock if we also hold a lock on
-			 * the newly entered node.
+			/*
+			 * We have just allocated a directory block in an
+			 * indirect block.  We must prevent holes in the
+			 * directory created if directory entries are
+			 * written out of order.  To accomplish this we
+			 * fsync when we extend a directory into indirects.
+			 * During rename it's not safe to drop the tvp lock
+			 * so sync must be delayed until it is.
+			 *
+			 * This synchronous step could be removed if fsck and
+			 * the kernel were taught to fill in sparse
+			 * directories rather than panic.
 			 */
-			if ((error = bwrite(bp)))
-				return (error);
+			if (isrename)
+				return (0);
 			if (tvp != NULL)
 				VOP_UNLOCK(tvp, 0);
 			error = VOP_FSYNC(dvp, MNT_WAIT, td);
@@ -1007,7 +1011,7 @@ int
 			    dp->i_offset + ((char *)ep - dirbuf));
 #endif
 		if (DOINGSOFTDEP(dvp))
-			softdep_change_directoryentry_offset(dp, dirbuf,
+			softdep_change_directoryentry_offset(bp, dp, dirbuf,
 			    (caddr_t)nep, (caddr_t)ep, dsize); 
 		else
 			bcopy((caddr_t)nep, (caddr_t)ep, dsize);
@@ -1059,6 +1063,8 @@ int
 		(void) softdep_setup_directory_add(bp, dp,
 		    dp->i_offset + (caddr_t)ep - dirbuf,
 		    dirp->d_ino, newdirbp, 0);
+		if (newdirbp != NULL)
+			bdwrite(newdirbp);
 		bdwrite(bp);
 	} else {
 		if (DOINGASYNC(dvp)) {
@@ -1076,7 +1082,8 @@ int
 	 * lock other inodes which can lead to deadlock if we also hold a
 	 * lock on the newly entered node.
 	 */
-	if (error == 0 && dp->i_endoff && dp->i_endoff < dp->i_size) {
+	if (isrename == 0 && error == 0 &&
+	    dp->i_endoff && dp->i_endoff < dp->i_size) {
 		if (tvp != NULL)
 			VOP_UNLOCK(tvp, 0);
 #ifdef UFS_DIRHASH
@@ -1117,6 +1124,19 @@ ufs_dirremove(dvp, ip, flags, isrmdir)
 
 	dp = VTOI(dvp);
 
+	/*
+	 * Adjust the link count early so softdep can block if necessary.
+	 */
+	if (ip) {
+		ip->i_effnlink--;
+		if (DOINGSOFTDEP(dvp)) {
+			softdep_setup_unlink(dp, ip);
+		} else {
+			ip->i_nlink--;
+			DIP_SET(ip, i_nlink, ip->i_nlink);
+			ip->i_flag |= IN_CHANGE;
+		}
+	}
 	if (flags & DOWHITEOUT) {
 		/*
 		 * Whiteout entry: set d_ino to WINO.
@@ -1146,6 +1166,9 @@ ufs_dirremove(dvp, ip, flags, isrmdir)
 	if (dp->i_dirhash != NULL)
 		ufsdirhash_remove(dp, rep, dp->i_offset);
 #endif
+	if (ip && rep->d_ino != ip->i_number)
+		panic("ufs_dirremove: ip %d does not match dirent ino %d\n",
+		    ip->i_number, rep->d_ino);
 	if (dp->i_count == 0) {
 		/*
 		 * First entry in block: set d_ino to zero.
@@ -1164,31 +1187,20 @@ ufs_dirremove(dvp, ip, flags, isrmdir)
 		    dp->i_offset & ~(DIRBLKSIZ - 1));
 #endif
 out:
+	error = 0;
 	if (DOINGSOFTDEP(dvp)) {
-		if (ip) {
-			ip->i_effnlink--;
-			softdep_change_linkcnt(ip);
+		if (ip)
 			softdep_setup_remove(bp, dp, ip, isrmdir);
-		}
-		if (softdep_slowdown(dvp)) {
+		if (softdep_slowdown(dvp))
 			error = bwrite(bp);
-		} else {
+		else
 			bdwrite(bp);
-			error = 0;
-		}
 	} else {
-		if (ip) {
-			ip->i_effnlink--;
-			ip->i_nlink--;
-			DIP_SET(ip, i_nlink, ip->i_nlink);
-			ip->i_flag |= IN_CHANGE;
-		}
 		if (flags & DOWHITEOUT)
 			error = bwrite(bp);
-		else if (DOINGASYNC(dvp) && dp->i_count != 0) {
+		else if (DOINGASYNC(dvp) && dp->i_count != 0)
 			bdwrite(bp);
-			error = 0;
-		} else
+		else
 			error = bwrite(bp);
 	}
 	dp->i_flag |= IN_CHANGE | IN_UPDATE;
@@ -1221,6 +1233,19 @@ ufs_dirrewrite(dp, oip, newinum, newtype, isrmdir)
 	struct vnode *vdp = ITOV(dp);
 	int error;
 
+	/*
+	 * Drop the link before we lock the buf so softdep can block if
+	 * necessary.
+	 */
+	oip->i_effnlink--;
+	if (DOINGSOFTDEP(vdp)) {
+		softdep_setup_unlink(dp, oip);
+	} else {
+		oip->i_nlink--;
+		DIP_SET(oip, i_nlink, oip->i_nlink);
+		oip->i_flag |= IN_CHANGE;
+	}
+
 	error = UFS_BLKATOFF(vdp, (off_t)dp->i_offset, (char **)&ep, &bp);
 	if (error)
 		return (error);
@@ -1232,15 +1257,10 @@ ufs_dirrewrite(dp, oip, newinum, newtype, isrmdir)
 	ep->d_ino = newinum;
 	if (!OFSFMT(vdp))
 		ep->d_type = newtype;
-	oip->i_effnlink--;
 	if (DOINGSOFTDEP(vdp)) {
-		softdep_change_linkcnt(oip);
 		softdep_setup_directory_change(bp, dp, oip, newinum, isrmdir);
 		bdwrite(bp);
 	} else {
-		oip->i_nlink--;
-		DIP_SET(oip, i_nlink, oip->i_nlink);
-		oip->i_flag |= IN_CHANGE;
 		if (DOINGASYNC(vdp)) {
 			bdwrite(bp);
 			error = 0;
@@ -1355,25 +1375,25 @@ ufs_dir_dd_ino(struct vnode *vp, struct ucred *cre
 
 /*
  * Check if source directory is in the path of the target directory.
- * Target is supplied locked, source is unlocked.
- * The target is always vput before returning.
  */
 int
-ufs_checkpath(ino_t source_ino, struct inode *target, struct ucred *cred)
+ufs_checkpath(ino_t source_ino, ino_t parent_ino, struct inode *target, struct ucred *cred, ino_t *wait_ino)
 {
-	struct vnode *vp, *vp1;
+	struct mount *mp;
+	struct vnode *tvp, *vp, *vp1;
 	int error;
 	ino_t dd_ino;
 
-	vp = ITOV(target);
-	if (target->i_number == source_ino) {
-		error = EEXIST;
-		goto out;
-	}
+	vp = tvp = ITOV(target);
+	mp = vp->v_mount;
+	*wait_ino = 0;
+	if (target->i_number == source_ino)
+		return (EEXIST);
+	if (target->i_number == parent_ino)
+		return (0);
+	if (target->i_number == ROOTINO)
+		return (0);
 	error = 0;
-	if (target->i_number == ROOTINO)
-		goto out;
-
 	for (;;) {
 		error = ufs_dir_dd_ino(vp, cred, &dd_ino);
 		if (error != 0)
@@ -1384,9 +1404,13 @@ int
 		}
 		if (dd_ino == ROOTINO)
 			break;
-		error = vn_vget_ino(vp, dd_ino, LK_EXCLUSIVE, &vp1);
-		if (error != 0)
+		if (dd_ino == parent_ino)
 			break;
+		error = VFS_VGET(mp, dd_ino, LK_SHARED | LK_NOWAIT, &vp1);
+		if (error != 0) {
+			*wait_ino = dd_ino;
+			break;
+		}
 		/* Recheck that ".." still points to vp1 after relock of vp */
 		error = ufs_dir_dd_ino(vp, cred, &dd_ino);
 		if (error != 0) {
@@ -1398,14 +1422,14 @@ int
 			vput(vp1);
 			continue;
 		}
-		vput(vp);
+		if (vp != tvp)
+			vput(vp);
 		vp = vp1;
 	}
 
-out:
 	if (error == ENOTDIR)
-		printf("checkpath: .. not a directory\n");
-	if (vp != NULL)
+		panic("checkpath: .. not a directory\n");
+	if (vp != tvp)
 		vput(vp);
 	return (error);
 }
Index: /usr/src/sys/ufs/ufs/ufs_extern.h
===================================================================
--- /usr/src/sys/ufs/ufs/ufs_extern.h	(revision 202614)
+++ /usr/src/sys/ufs/ufs/ufs_extern.h	(working copy)
@@ -57,7 +57,7 @@ int	 ufs_bmap(struct vop_bmap_args *);
 int	 ufs_bmaparray(struct vnode *, ufs2_daddr_t, ufs2_daddr_t *,
 	    struct buf *, int *, int *);
 int	 ufs_fhtovp(struct mount *, struct ufid *, struct vnode **);
-int	 ufs_checkpath(ino_t, struct inode *, struct ucred *);
+int	 ufs_checkpath(ino_t, ino_t, struct inode *, struct ucred *, ino_t *);
 void	 ufs_dirbad(struct inode *, doff_t, char *);
 int	 ufs_dirbadentry(struct vnode *, struct direct *, int);
 int	 ufs_dirempty(struct inode *, ino_t, struct ucred *);
@@ -66,9 +66,11 @@ int	 ufs_extwrite(struct vop_write_args *);
 void	 ufs_makedirentry(struct inode *, struct componentname *,
 	    struct direct *);
 int	 ufs_direnter(struct vnode *, struct vnode *, struct direct *,
-	    struct componentname *, struct buf *);
+	    struct componentname *, struct buf *, int);
 int	 ufs_dirremove(struct vnode *, struct inode *, int, int);
 int	 ufs_dirrewrite(struct inode *, struct inode *, ino_t, int, int);
+int	 ufs_lookup_ino(struct vnode *, struct vnode **, struct componentname *,
+	    ino_t *);
 int	 ufs_getlbns(struct vnode *, ufs2_daddr_t, struct indir *, int *);
 int	 ufs_inactive(struct vop_inactive_args *);
 int	 ufs_init(struct vfsconf *);
@@ -81,19 +83,33 @@ vfs_root_t ufs_root;
 int	 ufs_uninit(struct vfsconf *);
 int	 ufs_vinit(struct mount *, struct vop_vector *, struct vnode **);
 
+#include <sys/sysctl.h>
+SYSCTL_DECL(_vfs_ufs);
+
 /*
  * Soft update function prototypes.
  */
 int	softdep_setup_directory_add(struct buf *, struct inode *, off_t,
 	    ino_t, struct buf *, int);
-void	softdep_change_directoryentry_offset(struct inode *, caddr_t,
-	    caddr_t, caddr_t, int);
+void	softdep_change_directoryentry_offset(struct buf *, struct inode *,
+	    caddr_t, caddr_t, caddr_t, int);
 void	softdep_setup_remove(struct buf *,struct inode *, struct inode *, int);
 void	softdep_setup_directory_change(struct buf *, struct inode *,
 	    struct inode *, ino_t, int);
 void	softdep_change_linkcnt(struct inode *);
 void	softdep_releasefile(struct inode *);
 int	softdep_slowdown(struct vnode *);
+void	softdep_setup_create(struct inode *, struct inode *);
+void	softdep_setup_dotdot_link(struct inode *, struct inode *);
+void	softdep_setup_link(struct inode *, struct inode *);
+void	softdep_setup_mkdir(struct inode *, struct inode *);
+void	softdep_setup_rmdir(struct inode *, struct inode *);
+void	softdep_setup_unlink(struct inode *, struct inode *);
+void	softdep_revert_create(struct inode *, struct inode *);
+void	softdep_revert_dotdot_link(struct inode *, struct inode *);
+void	softdep_revert_link(struct inode *, struct inode *);
+void	softdep_revert_mkdir(struct inode *, struct inode *);
+void	softdep_revert_rmdir(struct inode *, struct inode *);
 
 /*
  * Flags to low-level allocation routines.  The low 16-bits are reserved
Index: /usr/src/sys/ufs/ffs/ffs_vfsops.c
===================================================================
--- /usr/src/sys/ufs/ffs/ffs_vfsops.c	(revision 202614)
+++ /usr/src/sys/ufs/ffs/ffs_vfsops.c	(working copy)
@@ -79,7 +79,6 @@ static int	ffs_reload(struct mount *, struct threa
 static int	ffs_mountfs(struct vnode *, struct mount *, struct thread *);
 static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
 		    ufs2_daddr_t);
-static void	ffs_oldfscompat_write(struct fs *, struct ufsmount *);
 static void	ffs_ifree(struct ufsmount *ump, struct inode *ip);
 static vfs_init_t ffs_init;
 static vfs_uninit_t ffs_uninit;
@@ -331,6 +330,7 @@ ffs_mount(struct mount *mp)
 			MNT_ILOCK(mp);
 			mp->mnt_flag &= ~MNT_RDONLY;
 			MNT_IUNLOCK(mp);
+			fs->fs_mtime = time_second;
 			fs->fs_clean = 0;
 			if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
 				vn_finished_write(mp);
@@ -898,6 +898,7 @@ ffs_mountfs(devvp, mp, td)
 	 */
 	bzero(fs->fs_fsmnt, MAXMNTLEN);
 	strlcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MAXMNTLEN);
+	mp->mnt_stat.f_iosize = fs->fs_bsize;
 
 	if( mp->mnt_flag & MNT_ROOTFS) {
 		/*
@@ -909,6 +910,7 @@ ffs_mountfs(devvp, mp, td)
 	}
 
 	if (ronly == 0) {
+		fs->fs_mtime = time_second;
 		if ((fs->fs_flags & FS_DOSOFTDEP) &&
 		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
 			free(fs->fs_csp, M_UFSMNT);
@@ -939,7 +941,6 @@ ffs_mountfs(devvp, mp, td)
 	 * This would all happen while the filesystem was busy/not
 	 * available, so would effectively be "atomic".
 	 */
-	mp->mnt_stat.f_iosize = fs->fs_bsize;
 	(void) ufs_extattr_autostart(mp, td);
 #endif /* !UFS_EXTATTR_AUTOSTART */
 #endif /* !UFS_EXTATTR */
@@ -1039,7 +1040,7 @@ ffs_oldfscompat_read(fs, ump, sblockloc)
  * XXX - Parts get retired eventually.
  * Unfortunately new bits get added.
  */
-static void
+void
 ffs_oldfscompat_write(fs, ump)
 	struct fs *fs;
 	struct ufsmount *ump;
@@ -1134,6 +1135,7 @@ ffs_unmount(mp, mntflags)
 		fs->fs_pendinginodes = 0;
 	}
 	UFS_UNLOCK(ump);
+	softdep_unmount(mp);
 	if (fs->fs_ronly == 0) {
 		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
 		error = ffs_sbupdate(ump, MNT_WAIT, 0);
@@ -1575,16 +1577,6 @@ ffs_vgetf(mp, ino, flags, vpp, ffs_flags)
 			DIP_SET(ip, i_gen, ip->i_gen);
 		}
 	}
-	/*
-	 * Ensure that uid and gid are correct. This is a temporary
-	 * fix until fsck has been changed to do the update.
-	 */
-	if (fs->fs_magic == FS_UFS1_MAGIC &&		/* XXX */
-	    fs->fs_old_inodefmt < FS_44INODEFMT) {	/* XXX */
-		ip->i_uid = ip->i_din1->di_ouid;	/* XXX */
-		ip->i_gid = ip->i_din1->di_ogid;	/* XXX */
-	}						/* XXX */
-
 #ifdef MAC
 	if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
 		/*
@@ -1728,6 +1720,8 @@ ffs_sbupdate(mp, waitfor, suspended)
 	}
 	fs->fs_fmod = 0;
 	fs->fs_time = time_second;
+	if (fs->fs_flags & FS_DOSOFTDEP)
+		softdep_setup_sbupdate(mp, (struct fs *)bp->b_data, bp);
 	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
 	ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
 	if (suspended)
@@ -1869,9 +1863,6 @@ ffs_bufwrite(struct buf *bp)
 	}
 	BO_UNLOCK(bp->b_bufobj);
 
-	/* Mark the buffer clean */
-	bundirty(bp);
-
 	/*
 	 * If this buffer is marked for background writing and we
 	 * do not have to wait for it, make a copy and write the
@@ -1912,9 +1903,16 @@ ffs_bufwrite(struct buf *bp)
 		newbp->b_flags &= ~B_INVAL;
 
 #ifdef SOFTUPDATES
-		/* move over the dependencies */
-		if (!LIST_EMPTY(&bp->b_dep))
-			softdep_move_dependencies(bp, newbp);
+		/*
+		 * Move over the dependencies.  If there are rollbacks,
+		 * leave the parent buffer dirtied as it will need to
+		 * be written again.
+		 */
+		if (LIST_EMPTY(&bp->b_dep) ||
+		    softdep_move_dependencies(bp, newbp) == 0)
+			bundirty(bp);
+#else
+		bundirty(bp);
 #endif 
 
 		/*
@@ -1927,8 +1925,11 @@ ffs_bufwrite(struct buf *bp)
 		 */
 		bqrelse(bp);
 		bp = newbp;
-	}
+	} else
+		/* Mark the buffer clean */
+		bundirty(bp);
 
+
 	/* Let the normal bufwrite do the rest for us */
 normal_write:
 	return (bufwrite(bp));
Index: /usr/src/sys/ufs/ffs/ffs_softdep.c
===================================================================
--- /usr/src/sys/ufs/ffs/ffs_softdep.c	(revision 202614)
+++ /usr/src/sys/ufs/ffs/ffs_softdep.c	(working copy)
@@ -62,6 +62,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
+#include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
@@ -130,10 +131,12 @@ softdep_setup_inomapdep(bp, ip, newinum)
 }
 
 void
-softdep_setup_blkmapdep(bp, mp, newblkno)
+softdep_setup_blkmapdep(bp, mp, newblkno, frags, oldfrags)
 	struct buf *bp;
 	struct mount *mp;
 	ufs2_daddr_t newblkno;
+	int frags;
+	int oldfrags;
 {
 
 	panic("softdep_setup_blkmapdep called");
@@ -403,31 +406,13 @@ softdep_get_depcounts(struct mount *mp,
  * These definitions need to be adapted to the system to which
  * this file is being ported.
  */
-/*
- * malloc types defined for the softdep system.
- */
-static MALLOC_DEFINE(M_PAGEDEP, "pagedep","File page dependencies");
-static MALLOC_DEFINE(M_INODEDEP, "inodedep","Inode dependencies");
-static MALLOC_DEFINE(M_NEWBLK, "newblk","New block allocation");
-static MALLOC_DEFINE(M_BMSAFEMAP, "bmsafemap","Block or frag allocated from cyl group map");
-static MALLOC_DEFINE(M_ALLOCDIRECT, "allocdirect","Block or frag dependency for an inode");
-static MALLOC_DEFINE(M_INDIRDEP, "indirdep","Indirect block dependencies");
-static MALLOC_DEFINE(M_ALLOCINDIR, "allocindir","Block dependency for an indirect block");
-static MALLOC_DEFINE(M_FREEFRAG, "freefrag","Previously used frag for an inode");
-static MALLOC_DEFINE(M_FREEBLKS, "freeblks","Blocks freed from an inode");
-static MALLOC_DEFINE(M_FREEFILE, "freefile","Inode deallocated");
-static MALLOC_DEFINE(M_DIRADD, "diradd","New directory entry");
-static MALLOC_DEFINE(M_MKDIR, "mkdir","New directory");
-static MALLOC_DEFINE(M_DIRREM, "dirrem","Directory entry deleted");
-static MALLOC_DEFINE(M_NEWDIRBLK, "newdirblk","Unclaimed new directory block");
-static MALLOC_DEFINE(M_SAVEDINO, "savedino","Saved inodes");
 
 #define M_SOFTDEP_FLAGS	(M_WAITOK | M_USE_RESERVE)
 
 #define	D_PAGEDEP	0
 #define	D_INODEDEP	1
-#define	D_NEWBLK	2
-#define	D_BMSAFEMAP	3
+#define	D_BMSAFEMAP	2
+#define	D_NEWBLK	3
 #define	D_ALLOCDIRECT	4
 #define	D_INDIRDEP	5
 #define	D_ALLOCINDIR	6
@@ -438,8 +423,66 @@ softdep_get_depcounts(struct mount *mp,
 #define	D_MKDIR		11
 #define	D_DIRREM	12
 #define	D_NEWDIRBLK	13
-#define	D_LAST		D_NEWDIRBLK
+#define	D_FREEWORK	14
+#define	D_FREEDEP	15
+#define	D_JADDREF	16
+#define	D_JREMREF	17
+#define	D_JMVREF	18
+#define	D_JNEWBLK	19
+#define	D_JFREEBLK	20
+#define	D_JFREEFRAG	21
+#define	D_JSEG		22
+#define	D_JSEGDEP	23
+#define	D_SBDEP		24
+#define	D_LAST		D_SBDEP
 
+unsigned long dep_current[D_LAST + 1];
+unsigned long dep_total[D_LAST + 1];
+
+
+SYSCTL_NODE(_debug, OID_AUTO, softdep, CTLFLAG_RW, 0, "soft updates stats");
+SYSCTL_NODE(_debug_softdep, OID_AUTO, total, CTLFLAG_RW, 0,
+    "total dependencies allocated");
+SYSCTL_NODE(_debug_softdep, OID_AUTO, current, CTLFLAG_RW, 0,
+    "current dependencies allocated");
+
+#define	SOFTDEP_TYPE(type, str, long)					\
+    static MALLOC_DEFINE(M_ ## type, #str, long);			\
+    SYSCTL_LONG(_debug_softdep_total, OID_AUTO, str, CTLFLAG_RD,	\
+	&dep_total[D_ ## type], 0, "");					\
+    SYSCTL_LONG(_debug_softdep_current, OID_AUTO, str, CTLFLAG_RD, 	\
+	&dep_current[D_ ## type], 0, "");
+
+SOFTDEP_TYPE(PAGEDEP, pagedep, "File page dependencies"); 
+SOFTDEP_TYPE(INODEDEP, inodedep, "Inode dependencies");
+SOFTDEP_TYPE(BMSAFEMAP, bmsafemap,
+    "Block or frag allocated from cyl group map");
+SOFTDEP_TYPE(NEWBLK, newblk, "New block or frag allocation dependency");
+SOFTDEP_TYPE(ALLOCDIRECT, allocdirect, "Block or frag dependency for an inode");
+SOFTDEP_TYPE(INDIRDEP, indirdep, "Indirect block dependencies");
+SOFTDEP_TYPE(ALLOCINDIR, allocindir, "Block dependency for an indirect block");
+SOFTDEP_TYPE(FREEFRAG, freefrag, "Previously used frag for an inode");
+SOFTDEP_TYPE(FREEBLKS, freeblks, "Blocks freed from an inode");
+SOFTDEP_TYPE(FREEFILE, freefile, "Inode deallocated");
+SOFTDEP_TYPE(DIRADD, diradd, "New directory entry");
+SOFTDEP_TYPE(MKDIR, mkdir, "New directory");
+SOFTDEP_TYPE(DIRREM, dirrem, "Directory entry deleted");
+SOFTDEP_TYPE(NEWDIRBLK, newdirblk, "Unclaimed new directory block");
+SOFTDEP_TYPE(FREEWORK, freework, "free an inode block");
+SOFTDEP_TYPE(FREEDEP, freedep, "track a block free");
+SOFTDEP_TYPE(JADDREF, jaddref, "Journal inode ref add");
+SOFTDEP_TYPE(JREMREF, jremref, "Journal inode ref remove");
+SOFTDEP_TYPE(JMVREF, jmvref, "Journal inode ref move");
+SOFTDEP_TYPE(JNEWBLK, jnewblk, "Journal new block");
+SOFTDEP_TYPE(JFREEBLK, jfreeblk, "Journal free block");
+SOFTDEP_TYPE(JFREEFRAG, jfreefrag, "Journal free frag");
+SOFTDEP_TYPE(JSEG, jseg, "Journal segment");
+SOFTDEP_TYPE(JSEGDEP, jsegdep, "Journal segment complete");
+SOFTDEP_TYPE(SBDEP, sbdep, "Superblock write dependency");
+
+static MALLOC_DEFINE(M_SAVEDINO, "savedino", "Saved inodes");
+static MALLOC_DEFINE(M_JBLOCKS, "jblocks", "Journal block locations");
+
 /* 
  * translate from workitem type to memory type
  * MUST match the defines above, such that memtype[D_XXX] == M_XXX
@@ -447,8 +490,8 @@ softdep_get_depcounts(struct mount *mp,
 static struct malloc_type *memtype[] = {
 	M_PAGEDEP,
 	M_INODEDEP,
+	M_BMSAFEMAP,
 	M_NEWBLK,
-	M_BMSAFEMAP,
 	M_ALLOCDIRECT,
 	M_INDIRDEP,
 	M_ALLOCINDIR,
@@ -458,7 +501,18 @@ static struct malloc_type *memtype[] = {
 	M_DIRADD,
 	M_MKDIR,
 	M_DIRREM,
-	M_NEWDIRBLK
+	M_NEWDIRBLK,
+	M_FREEWORK,
+	M_FREEDEP,
+	M_JADDREF,
+	M_JREMREF,
+	M_JMVREF,
+	M_JNEWBLK,
+	M_JFREEBLK,
+	M_JFREEFRAG,
+	M_JSEG,
+	M_JSEGDEP,
+	M_SBDEP
 };
 
 #define DtoM(type) (memtype[type])
@@ -467,17 +521,21 @@ static struct malloc_type *memtype[] = {
  * Names of malloc types.
  */
 #define TYPENAME(type)  \
-	((unsigned)(type) < D_LAST ? memtype[type]->ks_shortdesc : "???")
+	((unsigned)(type) <= D_LAST ? memtype[type]->ks_shortdesc : "???")
 /*
  * End system adaptation definitions.
  */
 
+#define	DOTDOT_OFFSET	offsetof(struct dirtemplate, dotdot_ino)
+#define	DOT_OFFSET	offsetof(struct dirtemplate, dot_ino)
+
 /*
  * Forward declarations.
  */
 struct inodedep_hashhead;
 struct newblk_hashhead;
 struct pagedep_hashhead;
+struct bmsafemap_hashhead;
 
 /*
  * Internal function prototypes.
@@ -487,59 +545,170 @@ static	void drain_output(struct vnode *);
 static	struct buf *getdirtybuf(struct buf *, struct mtx *, int);
 static	void clear_remove(struct thread *);
 static	void clear_inodedeps(struct thread *);
+static	void unlinked_inodedep(struct mount *, struct inodedep *);
+static	void clear_unlinked_inodedep(struct inodedep *);
+static	struct inodedep *first_unlinked_inodedep(struct ufsmount *);
 static	int flush_pagedep_deps(struct vnode *, struct mount *,
 	    struct diraddhd *);
+static	void free_pagedep(struct pagedep *);
+static	int flush_newblk_dep(struct vnode *, struct mount *, ufs_lbn_t);
 static	int flush_inodedep_deps(struct mount *, ino_t);
 static	int flush_deplist(struct allocdirectlst *, int, int *);
 static	int handle_written_filepage(struct pagedep *, struct buf *);
+static	int handle_written_sbdep(struct sbdep *, struct buf *);
+static	void initiate_write_sbdep(struct sbdep *);
 static  void diradd_inode_written(struct diradd *, struct inodedep *);
+static	int handle_written_indirdep(struct indirdep *, struct buf *,
+	    struct buf**);
 static	int handle_written_inodeblock(struct inodedep *, struct buf *);
-static	void handle_allocdirect_partdone(struct allocdirect *);
+static	int handle_written_bmsafemap(struct bmsafemap *, struct buf *);
+static	void handle_written_jaddref(struct jaddref *, struct jseg *);
+static	void handle_written_jremref(struct jremref *, struct jseg *);
+static	void handle_written_jseg(struct jseg *, struct buf *);
+static	void handle_written_jnewblk(struct jnewblk *, struct jseg *);
+static	void handle_written_jfreeblk(struct jfreeblk *, struct jseg *);
+static	void handle_written_jfreefrag(struct jfreefrag *, struct jseg *);
+static	void complete_jseg(struct jseg *);
+static	void jseg_write(struct fs *, struct jblocks *, struct jseg *,
+	    uint8_t *);
+static	void jaddref_write(struct jaddref *, uint8_t *);
+static	void jremref_write(struct jremref *, uint8_t *);
+static	void jmvref_write(struct jmvref *, uint8_t *);
+static	void jnewblk_write(struct jnewblk *, uint8_t *);
+static	void jfreeblk_write(struct jfreeblk *, uint8_t *);
+static	void jfreefrag_write(struct jfreefrag *, uint8_t *);
+static	inline void inoref_write(struct inoref *, struct jrefrec *);
+static	void handle_allocdirect_partdone(struct allocdirect *,
+	    struct workhead *);
+static	void cancel_newblk(struct newblk *, struct workhead *);
+static	void indirdep_complete(struct indirdep *);
 static	void handle_allocindir_partdone(struct allocindir *);
 static	void initiate_write_filepage(struct pagedep *, struct buf *);
+static	void initiate_write_indirdep(struct indirdep*, struct buf *);
 static	void handle_written_mkdir(struct mkdir *, int);
+static	void initiate_write_bmsafemap(struct bmsafemap *, struct buf *);
 static	void initiate_write_inodeblock_ufs1(struct inodedep *, struct buf *);
 static	void initiate_write_inodeblock_ufs2(struct inodedep *, struct buf *);
 static	void handle_workitem_freefile(struct freefile *);
 static	void handle_workitem_remove(struct dirrem *, struct vnode *);
 static	struct dirrem *newdirrem(struct buf *, struct inode *,
 	    struct inode *, int, struct dirrem **);
-static	void free_diradd(struct diradd *);
-static	void free_allocindir(struct allocindir *, struct inodedep *);
+static	void cancel_indirdep(struct indirdep *, struct buf *, struct inodedep *,
+	    struct freeblks *);
+static	void free_indirdep(struct indirdep *);
+static	void free_diradd(struct diradd *, struct workhead *);
+static	void merge_diradd(struct inodedep *, struct diradd *);
+static	void complete_diradd(struct diradd *);
+static	struct diradd *diradd_lookup(struct pagedep *, int);
+static	struct jremref *cancel_diradd_dotdot(struct inode *, struct dirrem *,
+	    struct jremref *);
+static	struct jremref *cancel_mkdir_dotdot(struct inode *, struct dirrem *,
+	    struct jremref *);
+static	void cancel_diradd(struct diradd *, struct dirrem *, struct jremref *,
+	    struct jremref *, struct jremref *);
+static	void dirrem_journal(struct dirrem *, struct jremref *, struct jremref *,
+	    struct jremref *);
+static	void cancel_allocindir(struct allocindir *, struct inodedep *,
+	    struct freeblks *);
+static	void complete_mkdir(struct mkdir *);
 static	void free_newdirblk(struct newdirblk *);
-static	int indir_trunc(struct freeblks *, ufs2_daddr_t, int, ufs_lbn_t,
-	    ufs2_daddr_t *);
-static	void deallocate_dependencies(struct buf *, struct inodedep *);
-static	void free_allocdirect(struct allocdirectlst *,
-	    struct allocdirect *, int);
+static	void free_jremref(struct jremref *);
+static	void free_jaddref(struct jaddref *);
+static	void free_jsegdep(struct jsegdep *);
+static	void free_jseg(struct jseg *);
+static	void free_jnewblk(struct jnewblk *);
+static	void free_jfreeblk(struct jfreeblk *);
+static	void free_jfreefrag(struct jfreefrag *);
+static	void free_freedep(struct freedep *);
+static	void journal_jremref(struct dirrem *, struct jremref *,
+	    struct inodedep *);
+static	void cancel_jnewblk(struct jnewblk *, struct workhead *);
+static	int cancel_jaddref(struct jaddref *, struct inodedep *,
+	    struct workhead *);
+static	void cancel_jfreefrag(struct jfreefrag *);
+static	void indir_trunc(struct freework *, ufs2_daddr_t, ufs_lbn_t);
+static	int deallocate_dependencies(struct buf *, struct inodedep *,
+	    struct freeblks *);
+static	void free_newblk(struct newblk *);
+static	void cancel_allocdirect(struct allocdirectlst *,
+	    struct allocdirect *, struct freeblks *, int);
 static	int check_inode_unwritten(struct inodedep *);
 static	int free_inodedep(struct inodedep *);
+static	void freework_freeblock(struct freework *);
 static	void handle_workitem_freeblocks(struct freeblks *, int);
+static	void handle_complete_freeblocks(struct freeblks *);
+static	void handle_workitem_indirblk(struct freework *);
+static	void handle_written_freework(struct freework *);
 static	void merge_inode_lists(struct allocdirectlst *,struct allocdirectlst *);
 static	void setup_allocindir_phase2(struct buf *, struct inode *,
-	    struct allocindir *);
+	    struct inodedep *, struct allocindir *, ufs_lbn_t);
 static	struct allocindir *newallocindir(struct inode *, int, ufs2_daddr_t,
-	    ufs2_daddr_t);
+	    ufs2_daddr_t, ufs_lbn_t);
 static	void handle_workitem_freefrag(struct freefrag *);
-static	struct freefrag *newfreefrag(struct inode *, ufs2_daddr_t, long);
+static	struct freefrag *newfreefrag(struct inode *, ufs2_daddr_t, long,
+	    ufs_lbn_t);
 static	void allocdirect_merge(struct allocdirectlst *,
 	    struct allocdirect *, struct allocdirect *);
-static	struct bmsafemap *bmsafemap_lookup(struct mount *, struct buf *);
-static	int newblk_find(struct newblk_hashhead *, struct fs *, ufs2_daddr_t,
-	    struct newblk **);
-static	int newblk_lookup(struct fs *, ufs2_daddr_t, int, struct newblk **);
+static	struct freefrag *allocindir_merge(struct allocindir *,
+	    struct allocindir *);
+static	int bmsafemap_find(struct bmsafemap_hashhead *, struct mount *, int,
+	    struct bmsafemap **);
+static	struct bmsafemap *bmsafemap_lookup(struct mount *, struct buf *,
+	    int cg);
+static	int newblk_find(struct newblk_hashhead *, struct mount *, ufs2_daddr_t,
+	    int, struct newblk **);
+static	int newblk_lookup(struct mount *, ufs2_daddr_t, int, struct newblk **);
 static	int inodedep_find(struct inodedep_hashhead *, struct fs *, ino_t,
 	    struct inodedep **);
 static	int inodedep_lookup(struct mount *, ino_t, int, struct inodedep **);
-static	int pagedep_lookup(struct inode *, ufs_lbn_t, int, struct pagedep **);
+static	int pagedep_lookup(struct mount *, ino_t, ufs_lbn_t, int,
+	    struct pagedep **);
 static	int pagedep_find(struct pagedep_hashhead *, ino_t, ufs_lbn_t,
 	    struct mount *mp, int, struct pagedep **);
 static	void pause_timer(void *);
 static	int request_cleanup(struct mount *, int);
 static	int process_worklist_item(struct mount *, int);
-static	void add_to_worklist(struct worklist *);
+static	void process_removes(struct vnode *);
+static	void jwork_move(struct workhead *, struct workhead *);
+static	void add_to_worklist(struct worklist *, int);
+static	void remove_from_worklist(struct worklist *);
 static	void softdep_flush(void);
 static	int softdep_speedup(void);
+static	void worklist_speedup(void);
+static	int journal_mount(struct mount *, struct fs *, struct ucred *);
+static	void journal_unmount(struct mount *);
+static	int journal_space(struct ufsmount *, int);
+static	void journal_suspend(struct ufsmount *);
+static	void softdep_prelink(struct vnode *, struct vnode *);
+static	void add_to_journal(struct worklist *);
+static	void remove_from_journal(struct worklist *);
+static	void softdep_process_journal(struct mount *, int);
+static	struct jremref *newjremref(struct dirrem *, struct inode *,
+	    struct inode *ip, off_t, nlink_t);
+static	struct jaddref *newjaddref(struct inode *, ino_t, off_t, int16_t,
+	    uint16_t);
+static inline void newinoref(struct inoref *, ino_t, ino_t, off_t, nlink_t,
+	    uint16_t);
+static inline struct jsegdep *inoref_segattach(struct inoref *, struct jseg *);
+static	struct jmvref *newjmvref(struct inode *, ino_t, off_t, off_t);
+static	struct jfreeblk *newjfreeblk(struct freeblks *, ufs_lbn_t,
+	    ufs2_daddr_t, int);
+static	struct jfreefrag *newjfreefrag(struct freefrag *, struct inode *,
+	    ufs2_daddr_t, long, ufs_lbn_t);
+static	struct freework *newfreework(struct freeblks *, struct freework *, 
+	    ufs_lbn_t, ufs2_daddr_t, int, int);
+static	void jwait(struct worklist *wk);
+static	struct inodedep *inodedep_lookup_ip(struct inode *);
+static	int bmsafemap_rollbacks(struct bmsafemap *);
+static	struct freefile *handle_bufwait(struct inodedep *, struct workhead *);
+static	void handle_jwork(struct workhead *);
+static	struct mkdir *setup_newdir(struct diradd *, ino_t, ino_t, struct buf *,
+	    struct mkdir **);
+static	struct jblocks *jblocks_create(void);
+static	ufs2_daddr_t jblocks_alloc(struct jblocks *, int, int *);
+static	void jblocks_free(struct jblocks *, struct mount *, int);
+static	void jblocks_destroy(struct jblocks *);
+static	void jblocks_add(struct jblocks *, ufs2_daddr_t, int);
 
 /*
  * Exported softdep operations.
@@ -572,40 +741,128 @@ MTX_SYSINIT(softdep_lock, &lk, "Softdep Lock", MTX
 	(item)->wk_state &= ~ONWORKLIST;	\
 	LIST_REMOVE(item, wk_list);		\
 } while (0)
+#define WORKLIST_INSERT_UNLOCKED	WORKLIST_INSERT
+#define WORKLIST_REMOVE_UNLOCKED	WORKLIST_REMOVE
+
 #else /* DEBUG */
-static	void worklist_insert(struct workhead *, struct worklist *);
-static	void worklist_remove(struct worklist *);
+static	void worklist_insert(struct workhead *, struct worklist *, int);
+static	void worklist_remove(struct worklist *, int);
 
-#define WORKLIST_INSERT(head, item) worklist_insert(head, item)
-#define WORKLIST_REMOVE(item) worklist_remove(item)
+#define WORKLIST_INSERT(head, item) worklist_insert(head, item, 1)
+#define WORKLIST_INSERT_UNLOCKED(head, item) worklist_insert(head, item, 0)
+#define WORKLIST_REMOVE(item) worklist_remove(item, 1)
+#define WORKLIST_REMOVE_UNLOCKED(item) worklist_remove(item, 0)
 
 static void
-worklist_insert(head, item)
+worklist_insert(head, item, locked)
 	struct workhead *head;
 	struct worklist *item;
+	int locked;
 {
 
-	mtx_assert(&lk, MA_OWNED);
+	if (locked)
+		mtx_assert(&lk, MA_OWNED);
 	if (item->wk_state & ONWORKLIST)
-		panic("worklist_insert: already on list");
+		panic("worklist_insert: %p %s(0x%X) already on list",
+		    item, TYPENAME(item->wk_type), item->wk_state);
 	item->wk_state |= ONWORKLIST;
 	LIST_INSERT_HEAD(head, item, wk_list);
 }
 
 static void
-worklist_remove(item)
+worklist_remove(item, locked)
 	struct worklist *item;
+	int locked;
 {
 
-	mtx_assert(&lk, MA_OWNED);
+	if (locked)
+		mtx_assert(&lk, MA_OWNED);
 	if ((item->wk_state & ONWORKLIST) == 0)
-		panic("worklist_remove: not on list");
+		panic("worklist_remove: %p %s(0x%X) not on list",
+		    item, TYPENAME(item->wk_type), item->wk_state);
 	item->wk_state &= ~ONWORKLIST;
 	LIST_REMOVE(item, wk_list);
 }
 #endif /* DEBUG */
 
 /*
+ * Merge two jsegdeps keeping only the oldest one as newer references
+ * can't be discarded until after older references.
+ */
+static inline struct jsegdep *
+jsegdep_merge(struct jsegdep *one, struct jsegdep *two)
+{
+	struct jsegdep *swp;
+
+	if (two == NULL)
+		return (one);
+
+	if (one->jd_seg->js_seq > two->jd_seg->js_seq) {
+		swp = one;
+		one = two;
+		two = swp;
+	}
+	WORKLIST_REMOVE(&two->jd_list);
+	free_jsegdep(two);
+
+	return (one);
+}
+
+/*
+ * If two freedeps are compatible free one to reduce list size.
+ */
+static inline struct freedep *
+freedep_merge(struct freedep *one, struct freedep *two)
+{
+	if (two == NULL)
+		return (one);
+
+	if (one->fd_freework == two->fd_freework) {
+		WORKLIST_REMOVE(&two->fd_list);
+		free_freedep(two);
+	}
+	return (one);
+}
+
+/*
+ * Move journal work from one list to another.  Duplicate freedeps and
+ * jsegdeps are coalesced to keep the lists as small as possible.
+ */
+static void
+jwork_move(dst, src)
+	struct workhead *dst;
+	struct workhead *src;
+{
+	struct freedep *freedep;
+	struct jsegdep *jsegdep;
+	struct worklist *wkn;
+	struct worklist *wk;
+
+	KASSERT(dst != src,
+	    ("jwork_move: dst == src"));
+	freedep = NULL;
+	jsegdep = NULL;
+	LIST_FOREACH_SAFE(wk, dst, wk_list, wkn) {
+		if (wk->wk_type == D_JSEGDEP)
+			jsegdep = jsegdep_merge(WK_JSEGDEP(wk), jsegdep);
+		if (wk->wk_type == D_FREEDEP)
+			freedep = freedep_merge(WK_FREEDEP(wk), freedep);
+	}
+
+	mtx_assert(&lk, MA_OWNED);
+	while ((wk = LIST_FIRST(src)) != NULL) {
+		WORKLIST_REMOVE(wk);
+		WORKLIST_INSERT(dst, wk);
+		if (wk->wk_type == D_JSEGDEP) {
+			jsegdep = jsegdep_merge(WK_JSEGDEP(wk), jsegdep);
+			continue;
+		}
+		if (wk->wk_type == D_FREEDEP)
+			freedep = freedep_merge(WK_FREEDEP(wk), freedep);
+	}
+}
+
+/*
  * Routines for tracking and managing workitems.
  */
 static	void workitem_free(struct worklist *, int);
@@ -623,13 +880,16 @@ workitem_free(item, type)
 
 #ifdef DEBUG
 	if (item->wk_state & ONWORKLIST)
-		panic("workitem_free: still on list");
+		panic("workitem_free: %s(0x%X) still on list",
+		    TYPENAME(item->wk_type), item->wk_state);
 	if (item->wk_type != type)
-		panic("workitem_free: type mismatch");
+		panic("workitem_free: type mismatch %s != %s",
+		    TYPENAME(item->wk_type), TYPENAME(type));
 #endif
 	ump = VFSTOUFS(item->wk_mp);
 	if (--ump->softdep_deps == 0 && ump->softdep_req)
 		wakeup(&ump->softdep_deps);
+	dep_current[type]--;
 	free(item, DtoM(type));
 }
 
@@ -643,6 +903,8 @@ workitem_alloc(item, type, mp)
 	item->wk_mp = mp;
 	item->wk_state = 0;
 	ACQUIRE_LOCK(&lk);
+	dep_current[type]++;
+	dep_total[type]++;
 	VFSTOUFS(mp)->softdep_deps++;
 	VFSTOUFS(mp)->softdep_accdeps++;
 	FREE_LOCK(&lk);
@@ -679,23 +941,38 @@ static int stat_inode_bitmap;	/* bufs redirtied as
 static int stat_direct_blk_ptrs;/* bufs redirtied as direct ptrs not written */
 static int stat_dir_entry;	/* bufs redirtied as dir entry cannot write */
 
-SYSCTL_INT(_debug, OID_AUTO, max_softdeps, CTLFLAG_RW, &max_softdeps, 0, "");
-SYSCTL_INT(_debug, OID_AUTO, tickdelay, CTLFLAG_RW, &tickdelay, 0, "");
-SYSCTL_INT(_debug, OID_AUTO, maxindirdeps, CTLFLAG_RW, &maxindirdeps, 0, "");
-SYSCTL_INT(_debug, OID_AUTO, worklist_push, CTLFLAG_RW, &stat_worklist_push, 0,"");
-SYSCTL_INT(_debug, OID_AUTO, blk_limit_push, CTLFLAG_RW, &stat_blk_limit_push, 0,"");
-SYSCTL_INT(_debug, OID_AUTO, ino_limit_push, CTLFLAG_RW, &stat_ino_limit_push, 0,"");
-SYSCTL_INT(_debug, OID_AUTO, blk_limit_hit, CTLFLAG_RW, &stat_blk_limit_hit, 0, "");
-SYSCTL_INT(_debug, OID_AUTO, ino_limit_hit, CTLFLAG_RW, &stat_ino_limit_hit, 0, "");
-SYSCTL_INT(_debug, OID_AUTO, sync_limit_hit, CTLFLAG_RW, &stat_sync_limit_hit, 0, "");
-SYSCTL_INT(_debug, OID_AUTO, indir_blk_ptrs, CTLFLAG_RW, &stat_indir_blk_ptrs, 0, "");
-SYSCTL_INT(_debug, OID_AUTO, inode_bitmap, CTLFLAG_RW, &stat_inode_bitmap, 0, "");
-SYSCTL_INT(_debug, OID_AUTO, direct_blk_ptrs, CTLFLAG_RW, &stat_direct_blk_ptrs, 0, "");
-SYSCTL_INT(_debug, OID_AUTO, dir_entry, CTLFLAG_RW, &stat_dir_entry, 0, "");
-/* SYSCTL_INT(_debug, OID_AUTO, worklist_num, CTLFLAG_RD, &softdep_on_worklist, 0, ""); */
+SYSCTL_INT(_debug_softdep, OID_AUTO, max_softdeps, CTLFLAG_RW,
+    &max_softdeps, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, tickdelay, CTLFLAG_RW,
+    &tickdelay, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, maxindirdeps, CTLFLAG_RW,
+    &maxindirdeps, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, worklist_push, CTLFLAG_RW,
+    &stat_worklist_push, 0,"");
+SYSCTL_INT(_debug_softdep, OID_AUTO, blk_limit_push, CTLFLAG_RW,
+    &stat_blk_limit_push, 0,"");
+SYSCTL_INT(_debug_softdep, OID_AUTO, ino_limit_push, CTLFLAG_RW,
+    &stat_ino_limit_push, 0,"");
+SYSCTL_INT(_debug_softdep, OID_AUTO, blk_limit_hit, CTLFLAG_RW,
+    &stat_blk_limit_hit, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, ino_limit_hit, CTLFLAG_RW,
+    &stat_ino_limit_hit, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, sync_limit_hit, CTLFLAG_RW,
+    &stat_sync_limit_hit, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, indir_blk_ptrs, CTLFLAG_RW,
+    &stat_indir_blk_ptrs, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, inode_bitmap, CTLFLAG_RW,
+    &stat_inode_bitmap, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, direct_blk_ptrs, CTLFLAG_RW,
+    &stat_direct_blk_ptrs, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, dir_entry, CTLFLAG_RW,
+    &stat_dir_entry, 0, "");
 
 SYSCTL_DECL(_vfs_ffs);
 
+LIST_HEAD(bmsafemap_hashhead, bmsafemap) *bmsafemap_hashtbl;
+static u_long	bmsafemap_hash;	/* size of hash table - 1 */
+
 static int compute_summary_at_mount = 0;	/* Whether to recompute the summary at mount time */
 SYSCTL_INT(_vfs_ffs, OID_AUTO, compute_summary_at_mount, CTLFLAG_RW,
 	   &compute_summary_at_mount, 0, "Recompute summary at mount");
@@ -770,16 +1047,22 @@ softdep_flush(void)
 	}
 }
 
-static int
-softdep_speedup(void)
+static void
+worklist_speedup(void)
 {
-
 	mtx_assert(&lk, MA_OWNED);
 	if (req_pending == 0) {
 		req_pending = 1;
 		wakeup(&req_pending);
 	}
+}
 
+static int
+softdep_speedup(void)
+{
+
+	worklist_speedup();
+	bd_speedup();
 	return speedup_syncer();
 }
 
@@ -791,15 +1074,17 @@ softdep_flush(void)
  * and does so in order from first to last.
  */
 static void
-add_to_worklist(wk)
+add_to_worklist(wk, nodelay)
 	struct worklist *wk;
+	int nodelay;
 {
 	struct ufsmount *ump;
 
 	mtx_assert(&lk, MA_OWNED);
 	ump = VFSTOUFS(wk->wk_mp);
 	if (wk->wk_state & ONWORKLIST)
-		panic("add_to_worklist: already on list");
+		panic("add_to_worklist: %s(0x%X) already on list",
+		    TYPENAME(wk->wk_type), wk->wk_state);
 	wk->wk_state |= ONWORKLIST;
 	if (LIST_EMPTY(&ump->softdep_workitem_pending))
 		LIST_INSERT_HEAD(&ump->softdep_workitem_pending, wk, wk_list);
@@ -807,9 +1092,33 @@ static void
 		LIST_INSERT_AFTER(ump->softdep_worklist_tail, wk, wk_list);
 	ump->softdep_worklist_tail = wk;
 	ump->softdep_on_worklist += 1;
+	if (nodelay)
+		worklist_speedup();
 }
 
 /*
+ * Remove the item to be processed. If we are removing the last
+ * item on the list, we need to recalculate the tail pointer.
+ */
+static void
+remove_from_worklist(wk)
+	struct worklist *wk;
+{
+	struct ufsmount *ump;
+	struct worklist *wkend;
+
+	ump = VFSTOUFS(wk->wk_mp);
+	WORKLIST_REMOVE(wk);
+	if (wk == ump->softdep_worklist_tail) {
+		LIST_FOREACH(wkend, &ump->softdep_workitem_pending, wk_list)
+			if (LIST_NEXT(wkend, wk_list) == NULL)
+				break;
+		ump->softdep_worklist_tail = wkend;
+	}
+	ump->softdep_on_worklist -= 1;
+}
+
+/*
  * Process that runs once per second to handle items in the background queue.
  *
  * Note that we ensure that everything is done in the order in which they
@@ -838,8 +1147,9 @@ softdep_process_worklist(mp, full)
 	ACQUIRE_LOCK(&lk);
 	loopcount = 1;
 	starttime = time_second;
+	softdep_process_journal(mp, full?MNT_WAIT:0);
 	while (ump->softdep_on_worklist > 0) {
-		if ((cnt = process_worklist_item(mp, 0)) == -1)
+		if ((cnt = process_worklist_item(mp, LK_NOWAIT)) == -1)
 			break;
 		else
 			matchcnt += cnt;
@@ -871,16 +1181,61 @@ softdep_process_worklist(mp, full)
 		 * second. Otherwise the other mountpoints may get
 		 * excessively backlogged.
 		 */
-		if (!full && starttime != time_second) {
-			matchcnt = -1;
+		if (!full && starttime != time_second)
 			break;
-		}
 	}
 	FREE_LOCK(&lk);
 	return (matchcnt);
 }
 
 /*
+ * Process all removes associated with a vnode if we are running out of
+ * journal space.  Any other process which attempts to flush these will
+ * be unable as we have the vnodes locked.
+ */
+static void
+process_removes(vp)
+	struct vnode *vp;
+{
+	struct inodedep *inodedep;
+	struct dirrem *dirrem;
+	struct mount *mp;
+	ino_t inum;
+
+	mtx_assert(&lk, MA_OWNED);
+
+	mp = vp->v_mount;
+	inum = VTOI(vp)->i_number;
+	for (;;) {
+		if (inodedep_lookup(mp, inum, 0, &inodedep) == 0)
+			return;
+		LIST_FOREACH(dirrem, &inodedep->id_dirremhd, dm_inonext)
+			if ((dirrem->dm_state & (COMPLETE | ONWORKLIST)) ==
+			    (COMPLETE | ONWORKLIST))
+				break;
+		if (dirrem == NULL)
+			return;
+		/*
+		 * If another thread is trying to lock this vnode it will
+		 * fail but we must wait for it to do so before we can
+		 * proceed.
+		 */
+		if (dirrem->dm_state & INPROGRESS) {
+			dirrem->dm_state |= IOWAITING;
+			msleep(&dirrem->dm_list, &lk, PVM, "pwrwait", 0);
+			continue;
+		}
+		remove_from_worklist(&dirrem->dm_list);
+		FREE_LOCK(&lk);
+		if (vn_start_secondary_write(NULL, &mp, V_NOWAIT))
+			panic("process_removes: suspended filesystem");
+		handle_workitem_remove(dirrem, vp);
+		vn_finished_secondary_write(mp);
+		ACQUIRE_LOCK(&lk);
+	}
+}
+
+/*
  * Process one item on the worklist.
  */
 static int
@@ -888,7 +1243,7 @@ process_worklist_item(mp, flags)
 	struct mount *mp;
 	int flags;
 {
-	struct worklist *wk, *wkend;
+	struct worklist *wk, *wkXXX;
 	struct ufsmount *ump;
 	struct vnode *vp;
 	int matchcnt = 0;
@@ -908,11 +1263,14 @@ process_worklist_item(mp, flags)
 	 * inodes, we have to skip over any dirrem requests whose
 	 * vnodes are resident and locked.
 	 */
+	vp = NULL;
 	ump = VFSTOUFS(mp);
-	vp = NULL;
 	LIST_FOREACH(wk, &ump->softdep_workitem_pending, wk_list) {
-		if (wk->wk_state & INPROGRESS)
+		if (wk->wk_state & INPROGRESS) {
+			wkXXX = wk;
 			continue;
+		}
+		wkXXX = wk;	/* Record the last valid wk pointer. */
 		if ((flags & LK_NOWAIT) == 0 || wk->wk_type != D_DIRREM)
 			break;
 		wk->wk_state |= INPROGRESS;
@@ -921,6 +1279,10 @@ process_worklist_item(mp, flags)
 		ffs_vgetf(mp, WK_DIRREM(wk)->dm_oldinum,
 		    LK_NOWAIT | LK_EXCLUSIVE, &vp, FFSV_FORCEINSMQ);
 		ACQUIRE_LOCK(&lk);
+		if (wk->wk_state & IOWAITING) {
+			wk->wk_state &= ~IOWAITING;
+			wakeup(wk);
+		}
 		wk->wk_state &= ~INPROGRESS;
 		ump->softdep_on_worklist_inprogress--;
 		if (vp != NULL)
@@ -928,21 +1290,7 @@ process_worklist_item(mp, flags)
 	}
 	if (wk == 0)
 		return (-1);
-	/*
-	 * Remove the item to be processed. If we are removing the last
-	 * item on the list, we need to recalculate the tail pointer.
-	 * As this happens rarely and usually when the list is short,
-	 * we just run down the list to find it rather than tracking it
-	 * in the above loop.
-	 */
-	WORKLIST_REMOVE(wk);
-	if (wk == ump->softdep_worklist_tail) {
-		LIST_FOREACH(wkend, &ump->softdep_workitem_pending, wk_list)
-			if (LIST_NEXT(wkend, wk_list) == NULL)
-				break;
-		ump->softdep_worklist_tail = wkend;
-	}
-	ump->softdep_on_worklist -= 1;
+	remove_from_worklist(wk);
 	FREE_LOCK(&lk);
 	if (vn_start_secondary_write(NULL, &mp, V_NOWAIT))
 		panic("process_worklist_item: suspended filesystem");
@@ -952,6 +1300,8 @@ process_worklist_item(mp, flags)
 	case D_DIRREM:
 		/* removal of a directory entry */
 		handle_workitem_remove(WK_DIRREM(wk), vp);
+		if (vp)
+			vput(vp);
 		break;
 
 	case D_FREEBLKS:
@@ -969,6 +1319,11 @@ process_worklist_item(mp, flags)
 		handle_workitem_freefile(WK_FREEFILE(wk));
 		break;
 
+	case D_FREEWORK:
+		/* Final block in an indirect was freed. */
+		handle_workitem_indirblk(WK_FREEWORK(wk));
+		break;
+
 	default:
 		panic("%s_process_worklist: Unknown type %s",
 		    "softdep", TYPENAME(wk->wk_type));
@@ -982,19 +1337,22 @@ process_worklist_item(mp, flags)
 /*
  * Move dependencies from one buffer to another.
  */
-void
+int
 softdep_move_dependencies(oldbp, newbp)
 	struct buf *oldbp;
 	struct buf *newbp;
 {
 	struct worklist *wk, *wktail;
+	int dirty;
 
-	if (!LIST_EMPTY(&newbp->b_dep))
-		panic("softdep_move_dependencies: need merge code");
-	wktail = 0;
+	dirty = 0;
+	wktail = NULL;
 	ACQUIRE_LOCK(&lk);
 	while ((wk = LIST_FIRST(&oldbp->b_dep)) != NULL) {
 		LIST_REMOVE(wk, wk_list);
+		if (wk->wk_type == D_BMSAFEMAP &&
+		    bmsafemap_rollbacks(WK_BMSAFEMAP(wk)))
+			dirty = 1;
 		if (wktail == 0)
 			LIST_INSERT_HEAD(&newbp->b_dep, wk, wk_list);
 		else
@@ -1002,6 +1360,8 @@ softdep_move_dependencies(oldbp, newbp)
 		wktail = wk;
 	}
 	FREE_LOCK(&lk);
+
+	return (dirty);
 }
 
 /*
@@ -1198,23 +1558,22 @@ pagedep_find(pagedephd, ino, lbn, mp, flags, paged
  * This routine must be called with splbio interrupts blocked.
  */
 static int
-pagedep_lookup(ip, lbn, flags, pagedeppp)
-	struct inode *ip;
+pagedep_lookup(mp, ino, lbn, flags, pagedeppp)
+	struct mount *mp;
+	ino_t ino;
 	ufs_lbn_t lbn;
 	int flags;
 	struct pagedep **pagedeppp;
 {
 	struct pagedep *pagedep;
 	struct pagedep_hashhead *pagedephd;
-	struct mount *mp;
 	int ret;
 	int i;
 
 	mtx_assert(&lk, MA_OWNED);
-	mp = ITOV(ip)->v_mount;
-	pagedephd = PAGEDEP_HASH(mp, ip->i_number, lbn);
+	pagedephd = PAGEDEP_HASH(mp, ino, lbn);
 
-	ret = pagedep_find(pagedephd, ip->i_number, lbn, mp, flags, pagedeppp);
+	ret = pagedep_find(pagedephd, ino, lbn, mp, flags, pagedeppp);
 	if (*pagedeppp || (flags & DEPALLOC) == 0)
 		return (ret);
 	FREE_LOCK(&lk);
@@ -1222,12 +1581,12 @@ static int
 	    M_PAGEDEP, M_SOFTDEP_FLAGS|M_ZERO);
 	workitem_alloc(&pagedep->pd_list, D_PAGEDEP, mp);
 	ACQUIRE_LOCK(&lk);
-	ret = pagedep_find(pagedephd, ip->i_number, lbn, mp, flags, pagedeppp);
+	ret = pagedep_find(pagedephd, ino, lbn, mp, flags, pagedeppp);
 	if (*pagedeppp) {
 		WORKITEM_FREE(pagedep, D_PAGEDEP);
 		return (ret);
 	}
-	pagedep->pd_ino = ip->i_number;
+	pagedep->pd_ino = ino;
 	pagedep->pd_lbn = lbn;
 	LIST_INIT(&pagedep->pd_dirremhd);
 	LIST_INIT(&pagedep->pd_pendinghd);
@@ -1314,10 +1673,13 @@ inodedep_lookup(mp, inum, flags, inodedeppp)
 	inodedep->id_savedino1 = NULL;
 	inodedep->id_savedsize = -1;
 	inodedep->id_savedextsize = -1;
-	inodedep->id_buf = NULL;
+	inodedep->id_bmsafemap = NULL;
+	inodedep->id_mkdiradd = NULL;
+	LIST_INIT(&inodedep->id_dirremhd);
 	LIST_INIT(&inodedep->id_pendinghd);
 	LIST_INIT(&inodedep->id_inowait);
 	LIST_INIT(&inodedep->id_bufwait);
+	TAILQ_INIT(&inodedep->id_inoreflst);
 	TAILQ_INIT(&inodedep->id_inoupdt);
 	TAILQ_INIT(&inodedep->id_newinoupdt);
 	TAILQ_INIT(&inodedep->id_extupdt);
@@ -1336,17 +1698,29 @@ u_long	newblk_hash;		/* size of hash table - 1 */
 	(&newblk_hashtbl[((((register_t)(fs)) >> 13) + (inum)) & newblk_hash])
 
 static int
-newblk_find(newblkhd, fs, newblkno, newblkpp)
+newblk_find(newblkhd, mp, newblkno, flags, newblkpp)
 	struct newblk_hashhead *newblkhd;
-	struct fs *fs;
+	struct mount *mp;
 	ufs2_daddr_t newblkno;
+	int flags;
 	struct newblk **newblkpp;
 {
 	struct newblk *newblk;
 
-	LIST_FOREACH(newblk, newblkhd, nb_hash)
-		if (newblkno == newblk->nb_newblkno && fs == newblk->nb_fs)
-			break;
+	LIST_FOREACH(newblk, newblkhd, nb_hash) {
+		if (newblkno != newblk->nb_newblkno)
+			continue;
+		if (mp != newblk->nb_list.wk_mp)
+			continue;
+		/*
+		 * If we're creating a new dependency don't match those that
+		 * have already been converted to allocdirects.  This is for
+		 * a frag extend.
+		 */
+		if ((flags & DEPALLOC) && newblk->nb_list.wk_type != D_NEWBLK)
+			continue;
+		break;
+	}
 	if (newblk) {
 		*newblkpp = newblk;
 		return (1);
@@ -1361,8 +1735,8 @@ static int
  * Found or allocated entry is returned in newblkpp.
  */
 static int
-newblk_lookup(fs, newblkno, flags, newblkpp)
-	struct fs *fs;
+newblk_lookup(mp, newblkno, flags, newblkpp)
+	struct mount *mp;
 	ufs2_daddr_t newblkno;
 	int flags;
 	struct newblk **newblkpp;
@@ -1370,21 +1744,25 @@ static int
 	struct newblk *newblk;
 	struct newblk_hashhead *newblkhd;
 
-	newblkhd = NEWBLK_HASH(fs, newblkno);
-	if (newblk_find(newblkhd, fs, newblkno, newblkpp))
+	newblkhd = NEWBLK_HASH(VFSTOUFS(mp)->um_fs, newblkno);
+	if (newblk_find(newblkhd, mp, newblkno, flags, newblkpp))
 		return (1);
 	if ((flags & DEPALLOC) == 0)
 		return (0);
 	FREE_LOCK(&lk);
-	newblk = malloc(sizeof(struct newblk),
-		M_NEWBLK, M_SOFTDEP_FLAGS);
+	newblk = malloc(sizeof(union allblk), M_NEWBLK,
+	    M_SOFTDEP_FLAGS | M_ZERO);
+	workitem_alloc(&newblk->nb_list, D_NEWBLK, mp);
 	ACQUIRE_LOCK(&lk);
-	if (newblk_find(newblkhd, fs, newblkno, newblkpp)) {
-		free(newblk, M_NEWBLK);
+	if (newblk_find(newblkhd, mp, newblkno, flags, newblkpp)) {
+		WORKITEM_FREE(newblk, D_NEWBLK);
 		return (1);
 	}
-	newblk->nb_state = 0;
-	newblk->nb_fs = fs;
+	newblk->nb_freefrag = NULL;
+	LIST_INIT(&newblk->nb_indirdeps);
+	LIST_INIT(&newblk->nb_newdirblk);
+	LIST_INIT(&newblk->nb_jwork);
+	newblk->nb_state = ATTACHED;
 	newblk->nb_newblkno = newblkno;
 	LIST_INSERT_HEAD(newblkhd, newblk, nb_hash);
 	*newblkpp = newblk;
@@ -1401,10 +1779,10 @@ softdep_initialize()
 
 	LIST_INIT(&mkdirlisthd);
 	max_softdeps = desiredvnodes * 4;
-	pagedep_hashtbl = hashinit(desiredvnodes / 5, M_PAGEDEP,
-	    &pagedep_hash);
+	pagedep_hashtbl = hashinit(desiredvnodes / 5, M_PAGEDEP, &pagedep_hash);
 	inodedep_hashtbl = hashinit(desiredvnodes, M_INODEDEP, &inodedep_hash);
-	newblk_hashtbl = hashinit(64, M_NEWBLK, &newblk_hash);
+	newblk_hashtbl = hashinit(desiredvnodes / 5,  M_NEWBLK, &newblk_hash);
+	bmsafemap_hashtbl = hashinit(1024, M_BMSAFEMAP, &bmsafemap_hash);
 
 	/* initialise bioops hack */
 	bioops.io_start = softdep_disk_io_initiation;
@@ -1428,6 +1806,7 @@ softdep_uninitialize()
 	hashdestroy(pagedep_hashtbl, M_PAGEDEP, pagedep_hash);
 	hashdestroy(inodedep_hashtbl, M_INODEDEP, inodedep_hash);
 	hashdestroy(newblk_hashtbl, M_NEWBLK, newblk_hash);
+	hashdestroy(bmsafemap_hashtbl, M_BMSAFEMAP, bmsafemap_hash);
 }
 
 /*
@@ -1457,9 +1836,17 @@ softdep_mount(devvp, mp, fs, cred)
 	MNT_IUNLOCK(mp);
 	ump = VFSTOUFS(mp);
 	LIST_INIT(&ump->softdep_workitem_pending);
+	LIST_INIT(&ump->softdep_journal_pending);
+	TAILQ_INIT(&ump->softdep_unlinked);
 	ump->softdep_worklist_tail = NULL;
 	ump->softdep_on_worklist = 0;
 	ump->softdep_deps = 0;
+	if ((fs->fs_flags & FS_SUJ) &&
+	    (error = journal_mount(mp, fs, cred)) != 0) {
+		printf("fs->fs_flags 0x%X\n", fs->fs_flags);
+		printf("Failed to start journal: %d\n", error);
+		return (error);
+	}
 	/*
 	 * When doing soft updates, the counters in the
 	 * superblock may have gotten out of sync. Recomputation
@@ -1493,7 +1880,1872 @@ softdep_mount(devvp, mp, fs, cred)
 	return (0);
 }
 
+void
+softdep_unmount(mp)
+	struct mount *mp;
+{
+
+	if (mp->mnt_flag & MNT_SUJ)
+		journal_unmount(mp);
+}
+
+struct jblocks {
+	struct jseglst	jb_segs;	/* TAILQ of current segments. */
+	struct jseg	*jb_writeseg;	/* Next write to complete. */
+	struct jextent	*jb_extent;	/* Extent array. */
+	uint64_t	jb_nextseq;	/* Next sequence number. */
+	uint64_t	jb_oldestseq;	/* Oldest active sequence number. */
+	int		jb_avail;	/* Available extents. */
+	int		jb_used;	/* Last used extent. */
+	int		jb_head;	/* Allocator head. */
+	int		jb_off;		/* Allocator extent offset. */
+	int		jb_blocks;	/* Total disk blocks covered. */
+	int		jb_free;	/* Total disk blocks free. */
+	int		jb_min;		/* Minimum free space. */
+	int		jb_low;		/* Low on space. */
+	int		jb_age;		/* Insertion time of oldest rec. */
+	int		jb_suspended;	/* Did journal suspend writes? */
+};
+
+struct jextent {
+	ufs2_daddr_t	je_daddr;	/* Disk block address. */
+	int		je_blocks;	/* Disk block count. */
+};
+
+static struct jblocks *
+jblocks_create(void)
+{
+	struct jblocks *jblocks;
+
+	jblocks = malloc(sizeof(*jblocks), M_JBLOCKS, M_WAITOK | M_ZERO);
+	TAILQ_INIT(&jblocks->jb_segs);
+	jblocks->jb_avail = 10;
+	jblocks->jb_extent = malloc(sizeof(struct jextent) * jblocks->jb_avail,
+	    M_JBLOCKS, M_WAITOK | M_ZERO);
+
+	return (jblocks);
+}
+
+static ufs2_daddr_t
+jblocks_alloc(jblocks, bytes, actual)
+	struct jblocks *jblocks;
+	int bytes;
+	int *actual;
+{
+	ufs2_daddr_t daddr;
+	struct jextent *jext;
+	int freecnt;
+	int blocks;
+
+	blocks = bytes / DEV_BSIZE;
+	jext = &jblocks->jb_extent[jblocks->jb_head];
+	freecnt = jext->je_blocks - jblocks->jb_off;
+	if (freecnt == 0) {
+		jblocks->jb_off = 0;
+		if (++jblocks->jb_head > jblocks->jb_used)
+			jblocks->jb_head = 0;
+		jext = &jblocks->jb_extent[jblocks->jb_head];
+		freecnt = jext->je_blocks;
+	}
+	if (freecnt > blocks)
+		freecnt = blocks;
+	*actual = freecnt * DEV_BSIZE;
+	daddr = jext->je_daddr + jblocks->jb_off;
+	jblocks->jb_off += freecnt;
+	jblocks->jb_free -= freecnt;
+
+	return (daddr);
+}
+
+static void
+jblocks_free(jblocks, mp, bytes)
+	struct jblocks *jblocks;
+	struct mount *mp;
+	int bytes;
+{
+
+	jblocks->jb_free += bytes / DEV_BSIZE;
+	if (jblocks->jb_suspended)
+		worklist_speedup();
+	wakeup(jblocks);
+}
+
+static void
+jblocks_destroy(jblocks)
+	struct jblocks *jblocks;
+{
+
+	if (jblocks->jb_extent)
+		free(jblocks->jb_extent, M_JBLOCKS);
+	free(jblocks, M_JBLOCKS);
+}
+
+static void
+jblocks_add(jblocks, daddr, blocks)
+	struct jblocks *jblocks;
+	ufs2_daddr_t daddr;
+	int blocks;
+{
+	struct jextent *jext;
+
+	jblocks->jb_blocks += blocks;
+	jblocks->jb_free += blocks;
+	jext = &jblocks->jb_extent[jblocks->jb_used];
+	/* Adding the first block. */
+	if (jext->je_daddr == 0) {
+		jext->je_daddr = daddr;
+		jext->je_blocks = blocks;
+		return;
+	}
+	/* Extending the last extent. */
+	if (jext->je_daddr + jext->je_blocks == daddr) {
+		jext->je_blocks += blocks;
+		return;
+	}
+	/* Adding a new extent. */
+	if (++jblocks->jb_used == jblocks->jb_avail) {
+		jblocks->jb_avail *= 2;
+		jext = malloc(sizeof(struct jextent) * jblocks->jb_avail,
+		    M_JBLOCKS, M_WAITOK | M_ZERO);
+		memcpy(jext, jblocks->jb_extent,
+		    sizeof(struct jextent) * jblocks->jb_used);
+		free(jblocks->jb_extent, M_JBLOCKS);
+		jblocks->jb_extent = jext;
+	}
+	jext = &jblocks->jb_extent[jblocks->jb_used];
+	jext->je_daddr = daddr;
+	jext->je_blocks = blocks;
+	return;
+}
+
 /*
+ * Open and verify the journal file.
+ */
+static int
+journal_mount(mp, fs, cred)
+	struct mount *mp;
+	struct fs *fs;
+	struct ucred *cred;
+{
+	struct jblocks *jblocks;
+	struct vnode *vp;
+	struct inode *ip;
+	ufs2_daddr_t blkno;
+	int bcount;
+	int error;
+	int i;
+
+	mp->mnt_flag |= MNT_SUJ;
+	error = VFS_VGET(mp, fs->fs_sujournal, LK_EXCLUSIVE, &vp);
+	if (error)
+		return (error);
+	ip = VTOI(vp);
+	if (ip->i_size < SUJ_MIN) {
+		error = ENOSPC;
+		goto out;
+	}
+	bcount = lblkno(fs, ip->i_size);	/* Only use whole blocks. */
+	jblocks = jblocks_create();
+	for (i = 0; i < bcount; i++) {
+		error = ufs_bmaparray(vp, i, &blkno, NULL, NULL, NULL);
+		if (error)
+			break;
+		jblocks_add(jblocks, blkno, fsbtodb(fs, fs->fs_frag));
+	}
+	if (error) {
+		jblocks_destroy(jblocks);
+		goto out;
+	}
+	jblocks->jb_low = jblocks->jb_free / 3;	/* Reserve 33%. */
+	jblocks->jb_min = jblocks->jb_free / 10; /* Suspend at 10%. */
+	DIP_SET(ip, i_modrev, fs->fs_mtime);
+	ip->i_flags |= IN_MODIFIED;
+	ffs_update(vp, 1);
+	VFSTOUFS(mp)->softdep_jblocks = jblocks;
+out:
+	vput(vp);
+	return (error);
+}
+
+static void
+journal_unmount(mp)
+	struct mount *mp;
+{
+	struct ufsmount *ump;
+
+	ump = VFSTOUFS(mp);
+	if (ump->softdep_jblocks)
+		jblocks_destroy(ump->softdep_jblocks);
+	ump->softdep_jblocks = NULL;
+}
+
+/*
+ * Called when a journal record is ready to be written.  Space is allocated
+ * and the journal entry is created when the journal is flushed to stable
+ * store.
+ */
+static void
+add_to_journal(wk)
+	struct worklist *wk;
+{
+	struct ufsmount *ump;
+
+	mtx_assert(&lk, MA_OWNED);
+	ump = VFSTOUFS(wk->wk_mp);
+	if (wk->wk_state & ONWORKLIST)
+		panic("add_to_journal: %s(0x%X) already on list",
+		    TYPENAME(wk->wk_type), wk->wk_state);
+	wk->wk_state |= ONWORKLIST | DEPCOMPLETE;
+	if (LIST_EMPTY(&ump->softdep_journal_pending)) {
+		ump->softdep_jblocks->jb_age = ticks;
+		LIST_INSERT_HEAD(&ump->softdep_journal_pending, wk, wk_list);
+	} else
+		LIST_INSERT_AFTER(ump->softdep_journal_tail, wk, wk_list);
+	ump->softdep_journal_tail = wk;
+	ump->softdep_on_journal += 1;
+}
+
+/*
+ * Remove an arbitrary item for the journal worklist maintain the tail
+ * pointer.  This happens when a new operation obviates the need to
+ * journal an old operation.
+ */
+static void
+remove_from_journal(wk)
+	struct worklist *wk;
+{
+	struct ufsmount *ump;
+
+	mtx_assert(&lk, MA_OWNED);
+	ump = VFSTOUFS(wk->wk_mp);
+#ifdef DEBUG	/* XXX Expensive, temporary. */
+	{
+		struct worklist *wkn;
+
+		LIST_FOREACH(wkn, &ump->softdep_journal_pending, wk_list)
+			if (wkn == wk)
+				break;
+		if (wkn == NULL)
+			panic("remove_from_journal: %p is not in journal", wk);
+	}
+#endif
+	/*
+	 * We emulate a TAILQ to save space in most structures which do not
+	 * require TAILQ semantics.  Here we must update the tail position
+	 * when removing the tail which is not the final entry.
+	 */
+	if (ump->softdep_journal_tail == wk)
+		ump->softdep_journal_tail =
+		    (struct worklist *)wk->wk_list.le_prev;
+
+	WORKLIST_REMOVE(wk);
+	ump->softdep_on_journal -= 1;
+}
+
+static int
+journal_space(ump, thresh)
+	struct ufsmount *ump;
+	int thresh;
+{
+	struct jblocks *jblocks;
+	int avail;
+
+	jblocks = ump->softdep_jblocks;
+	avail = (ump->softdep_on_journal * JREC_SIZE) / DEV_BSIZE;
+	avail = jblocks->jb_free - avail;
+
+	return (avail > thresh);
+}
+
+static void
+journal_suspend(ump)
+	struct ufsmount *ump;
+{
+	struct jblocks *jblocks;
+	struct mount *mp;
+
+	mp = UFSTOVFS(ump);
+	jblocks = ump->softdep_jblocks;
+	MNT_ILOCK(mp);
+	if ((mp->mnt_kern_flag & MNTK_SUSPEND) == 0) {
+		mp->mnt_kern_flag |= MNTK_SUSPEND;
+		mp->mnt_susp_owner = FIRST_THREAD_IN_PROC(softdepproc);
+	}
+	jblocks->jb_suspended = 1;
+	MNT_IUNLOCK(mp);
+}
+
+/*
+ * Called before any allocation function to be certain that there is
+ * sufficient space in the journal prior to creating any new records.
+ * Since in the case of block allocation we may have multiple locked
+ * buffers at the time of the actual allocation we can not block
+ * when the journal records are created.  Doing so would create a deadlock
+ * if any of these buffers needed to be flushed to reclaim space.  Instead
+ * we require a sufficiently large amount of available space such that
+ * each thread in the system could have passed this allocation check and
+ * still have sufficient free space.  With 20% of a minimum journal size
+ * of 1MB we have 6553 records available.
+ */
+int
+softdep_prealloc(vp, waitok)
+	struct vnode *vp;
+	int waitok;
+{
+	struct jblocks *jblocks;
+	struct ufsmount *ump;
+
+	if (DOINGSUJ(vp) == 0)
+		return (0);
+	ump = VFSTOUFS(vp->v_mount);
+	jblocks = ump->softdep_jblocks;
+	ACQUIRE_LOCK(&lk);
+	if (journal_space(ump, jblocks->jb_low)) {
+		FREE_LOCK(&lk);
+		return (0);
+	}
+	FREE_LOCK(&lk);
+	if (waitok == MNT_NOWAIT)
+		return (ENOSPC);
+	/*
+	 * Attempt to sync this vnode once to flush any journal
+	 * work attached to it.
+	 */
+	ffs_syncvnode(vp, waitok);
+	ACQUIRE_LOCK(&lk);
+	process_removes(vp);
+	if (journal_space(ump, jblocks->jb_low) == 0) {
+		softdep_speedup();
+		if (journal_space(ump, jblocks->jb_min) == 0)
+			journal_suspend(ump);
+	}
+	FREE_LOCK(&lk);
+
+	return (0);
+}
+
+static void
+softdep_prelink(dvp, vp)
+	struct vnode *dvp;
+	struct vnode *vp;
+{
+	struct jblocks *jblocks;
+	struct ufsmount *ump;
+
+	ump = VFSTOUFS(dvp->v_mount);
+	jblocks = ump->softdep_jblocks;
+	mtx_assert(&lk, MA_OWNED);
+	if (journal_space(ump, jblocks->jb_low))
+		return;
+	FREE_LOCK(&lk);
+	if (vp)
+		ffs_syncvnode(vp, MNT_NOWAIT);
+	ffs_syncvnode(dvp, MNT_WAIT);
+	ACQUIRE_LOCK(&lk);
+	/* Process vp before dvp as it may create .. removes. */
+	if (vp)
+		process_removes(vp);
+	process_removes(dvp);
+	softdep_speedup();
+	process_worklist_item(UFSTOVFS(ump), LK_NOWAIT);
+	process_worklist_item(UFSTOVFS(ump), LK_NOWAIT);
+	if (journal_space(ump, jblocks->jb_low) == 0) {
+		softdep_speedup();
+		if (journal_space(ump, jblocks->jb_min) == 0)
+			journal_suspend(ump);
+	}
+}
+
+static void
+jseg_write(fs, jblocks, jseg, data)
+	struct fs *fs;
+	struct jblocks *jblocks;
+	struct jseg *jseg;
+	uint8_t *data;
+{
+	struct jsegrec *rec;
+
+	rec = (struct jsegrec *)data;
+	rec->jsr_seq = jseg->js_seq;
+	rec->jsr_oldest = jblocks->jb_oldestseq;
+	rec->jsr_cnt = jseg->js_cnt;
+	rec->jsr_crc = 0;
+	rec->jsr_time = fs->fs_mtime;
+}
+
+static inline void
+inoref_write(inoref, rec)
+	struct inoref *inoref;
+	struct jrefrec *rec;
+{
+	rec->jr_ino = inoref->if_ino;
+	rec->jr_parent = inoref->if_parent;
+	rec->jr_nlink = inoref->if_nlink;
+	rec->jr_mode = inoref->if_mode;
+	rec->jr_diroff = inoref->if_diroff;
+}
+
+static void
+jaddref_write(jaddref, data)
+	struct jaddref *jaddref;
+	uint8_t *data;
+{
+	struct jrefrec *rec;
+
+	rec = (struct jrefrec *)data;
+	rec->jr_op = JOP_ADDREF;
+	inoref_write(&jaddref->ja_ref, rec);
+}
+
+static void
+jremref_write(jremref, data)
+	struct jremref *jremref;
+	uint8_t *data;
+{
+	struct jrefrec *rec;
+
+	rec = (struct jrefrec *)data;
+	rec->jr_op = JOP_REMREF;
+	inoref_write(&jremref->jr_ref, rec);
+}
+
+static	void
+jmvref_write(jmvref, data)
+	struct jmvref *jmvref;
+	uint8_t *data;
+{
+	struct jmvrec *rec;
+
+	rec = (struct jmvrec *)data;
+	rec->jm_op = JOP_MVREF;
+	rec->jm_ino = jmvref->jm_ino;
+	rec->jm_parent = jmvref->jm_parent;
+	rec->jm_oldoff = jmvref->jm_oldoff;
+	rec->jm_newoff = jmvref->jm_newoff;
+}
+
+static void
+jnewblk_write(jnewblk, data)
+	struct jnewblk *jnewblk;
+	uint8_t *data;
+{
+	struct jblkrec *rec;
+
+	rec = (struct jblkrec *)data;
+	rec->jb_op = JOP_NEWBLK;
+	rec->jb_ino = jnewblk->jn_ino;
+	rec->jb_blkno = jnewblk->jn_blkno;
+	rec->jb_lbn = jnewblk->jn_lbn;
+	rec->jb_frags = jnewblk->jn_frags;
+	rec->jb_oldfrags = jnewblk->jn_oldfrags;
+}
+
+static void
+jfreeblk_write(jfreeblk, data)
+	struct jfreeblk *jfreeblk;
+	uint8_t *data;
+{
+	struct jblkrec *rec;
+
+	rec = (struct jblkrec *)data;
+	rec->jb_op = JOP_FREEBLK;
+	rec->jb_ino = jfreeblk->jf_ino;
+	rec->jb_blkno = jfreeblk->jf_blkno;
+	rec->jb_lbn = jfreeblk->jf_lbn;
+	rec->jb_frags = jfreeblk->jf_frags;
+	rec->jb_oldfrags = 0;
+}
+
+static void
+jfreefrag_write(jfreefrag, data)
+	struct jfreefrag *jfreefrag;
+	uint8_t *data;
+{
+	struct jblkrec *rec;
+
+	rec = (struct jblkrec *)data;
+	rec->jb_op = JOP_FREEBLK;
+	rec->jb_ino = jfreefrag->fr_ino;
+	rec->jb_blkno = jfreefrag->fr_blkno;
+	rec->jb_lbn = jfreefrag->fr_lbn;
+	rec->jb_frags = jfreefrag->fr_frags;
+	rec->jb_oldfrags = 0;
+}
+
+/*
+ * Flush some journal records to disk.
+ */
+static void
+softdep_process_journal(mp, flags)
+	struct mount *mp;
+	int flags;
+{
+	struct jblocks *jblocks;
+	struct ufsmount *ump;
+	struct worklist *wk;
+	struct jseg *jseg;
+	struct buf *bp;
+	uint8_t *data;
+	struct fs *fs;
+	int segwritten;
+	int jrecmin;	/* Minimum write size. */
+	int jrecmax;	/* Maximum write size. */
+	int size;
+	int cnt;
+
+	if ((mp->mnt_flag & MNT_SUJ) == 0)
+		return;
+	ump = VFSTOUFS(mp);
+	fs = ump->um_fs;
+	jblocks = ump->softdep_jblocks;
+	/*
+	 * We write anywhere between a disk block and fs block.  The upper
+	 * bound is picked to prevent buffer cache fragmentation and limit
+	 * processing time per I/O.
+	 */
+	jrecmax = fs->fs_bsize / JREC_SIZE;
+	jrecmin = DEV_BSIZE / JREC_SIZE;
+	segwritten = 0;
+	while ((cnt = ump->softdep_on_journal) != 0) {
+		/*
+		 * Create a new segment to hold as many as 'cnt' journal
+		 * entries and add them to the segment.  Notice cnt is
+		 * off by one to account for the space required by the
+		 * jsegrec.  If we don't have a full block to log skip it
+		 * unless we haven't written anything in 10 seconds.
+		 */
+		cnt++;
+		if (cnt < jrecmax) {
+			if (segwritten)
+				return;
+			if (flags != MNT_WAIT &&
+			   (ticks - jblocks->jb_age) > hz*10)
+			break;
+		}
+		/*
+		 * Verify some free journal space.  softdep_prealloc() should
+	 	 * guarantee that we don't run out so this is indicative of
+		 * a problem with the flow control.  Try to recover
+		 * gracefully in any event.
+		 */
+		while (jblocks->jb_free == 0) {
+			if (flags != MNT_WAIT)
+				break;
+			printf("softdep: Out of journal space!\n");
+			softdep_speedup();
+			msleep(jblocks, &lk, PRIBIO, "jblocks", 1);
+		}
+		FREE_LOCK(&lk);
+		jseg = malloc(sizeof(*jseg), M_JSEG, M_SOFTDEP_FLAGS);
+		workitem_alloc(&jseg->js_list, D_JSEG, mp);
+		LIST_INIT(&jseg->js_entries);
+		jseg->js_state = ATTACHED;
+		jseg->js_refs = 1;	/* Self reference. */
+		jseg->js_jblocks = jblocks;
+		size = roundup2(cnt * JREC_SIZE, DEV_BSIZE);
+		bp = geteblk(fs->fs_bsize, 0);
+		ACQUIRE_LOCK(&lk);
+		/*
+		 * If there was a race while we were allocating the block
+		 * and jseg the entry we care about was likely written.
+		 * We bail out in both the WAIT and NOWAIT case and assume
+		 * the caller will loop if the entry it cares about is
+		 * not written.
+		 */
+		if (ump->softdep_on_journal == 0 || jblocks->jb_free == 0) {
+			bp->b_flags |= B_INVAL | B_NOCACHE;
+			WORKITEM_FREE(jseg, D_JSEG);
+			FREE_LOCK(&lk);
+			brelse(bp);
+			ACQUIRE_LOCK(&lk);
+			break;
+		}
+		/*
+		 * Calculate the disk block size required for the available
+		 * records rounded to the min size.
+		 */
+		cnt = ump->softdep_on_journal + 1;
+		if (cnt < jrecmax)
+			cnt = roundup2(cnt, jrecmin);
+		else
+			cnt = jrecmax;
+		size = cnt * JREC_SIZE;
+		/*
+		 * Allocate a disk block for this journal data and account
+		 * for truncation of the requested size if enough contiguous
+		 * space was not available.
+		 */
+		bp->b_blkno = bp->b_lblkno = jblocks_alloc(jblocks, size,
+		    &size);
+		bp->b_offset = bp->b_blkno * DEV_BSIZE;
+		bp->b_bcount = size;
+		bp->b_bufobj = &ump->um_devvp->v_bufobj;
+		bp->b_flags &= ~B_INVAL;
+		/*
+		 * Initialize our jseg with as many as cnt - 1 records.
+		 * Assign the next sequence number to it and link it
+		 * in-order.
+		 */
+		cnt = MIN(ump->softdep_on_journal, (size / JREC_SIZE) - 1);
+		jseg->js_buf = bp;
+		jseg->js_cnt = cnt;
+		jseg->js_size = size;
+		jseg->js_seq = jblocks->jb_nextseq++;
+		if (TAILQ_EMPTY(&jblocks->jb_segs))
+			jblocks->jb_oldestseq = jseg->js_seq;
+		TAILQ_INSERT_TAIL(&jblocks->jb_segs, jseg, js_next);
+		if (jblocks->jb_writeseg == NULL)
+			jblocks->jb_writeseg = jseg;
+		/*
+		 * Start filling in records from the pending list.
+		 */
+		data = bp->b_data;
+		jseg_write(fs, jblocks, jseg, data);
+		data += JREC_SIZE;
+		while ((wk = LIST_FIRST(&ump->softdep_journal_pending))
+		    != NULL) {
+			remove_from_journal(wk);
+			wk->wk_state |= IOSTARTED;
+			WORKLIST_INSERT(&jseg->js_entries, wk);
+			switch (wk->wk_type) {
+			case D_JADDREF:
+				jaddref_write(WK_JADDREF(wk), data);
+				break;
+			case D_JREMREF:
+				jremref_write(WK_JREMREF(wk), data);
+				break;
+			case D_JMVREF:
+				jmvref_write(WK_JMVREF(wk), data);
+				break;
+			case D_JNEWBLK:
+				jnewblk_write(WK_JNEWBLK(wk), data);
+				break;
+			case D_JFREEBLK:
+				jfreeblk_write(WK_JFREEBLK(wk), data);
+				break;
+			case D_JFREEFRAG:
+				jfreefrag_write(WK_JFREEFRAG(wk), data);
+				break;
+			default:
+				panic("process_journal: Unknown type %s",
+				    TYPENAME(wk->wk_type));
+				/* NOTREACHED */
+			}
+			data += JREC_SIZE;
+			if (--cnt == 0)
+				break;
+		}
+		/*
+		 * Write this one buffer and continue.
+		 */
+#if 1
+		WORKLIST_INSERT(&bp->b_dep, &jseg->js_list);
+		FREE_LOCK(&lk);
+		BO_LOCK(bp->b_bufobj);
+		bgetvp(ump->um_devvp, bp);
+		BO_UNLOCK(bp->b_bufobj);
+		/* XXX Could bawrite here. */
+		bwrite(bp);
+		ACQUIRE_LOCK(&lk);
+#else
+		/* This case simulates the write but does not log anything. */
+		handle_written_jseg(jseg, bp);
+		FREE_LOCK(&lk);
+		brelse(bp);
+		ACQUIRE_LOCK(&lk);
+#endif
+		segwritten++;
+	}
+	/*
+	 * If we've suspended the filesystem because we ran out of journal
+	 * space either try to sync it here to make some progress or
+	 * unsuspend it if we already have.
+	 */
+	if (flags == 0 && jblocks && jblocks->jb_suspended) {
+		if (journal_space(ump, jblocks->jb_min)) {
+			FREE_LOCK(&lk);
+			jblocks->jb_suspended = 0;
+			mp->mnt_susp_owner = curthread;
+			vfs_write_resume(mp);
+			ACQUIRE_LOCK(&lk);
+			return;
+		}
+		FREE_LOCK(&lk);
+		VFS_SYNC(mp, MNT_NOWAIT);
+		ffs_sbupdate(ump, MNT_WAIT, 0);
+		ACQUIRE_LOCK(&lk);
+	}
+}
+
+/*
+ * Complete a jseg, allowing all dependencies awaiting journal writes
+ * to proceed.  Each journal dependency also attaches a jsegdep to dependent
+ * structures so that the journal segment can be freed to reclaim space.
+ */
+static void
+complete_jseg(jseg)
+	struct jseg *jseg;
+{
+	struct worklist *wk;
+	struct jmvref *jmvref;
+	int waiting;
+	int i;
+
+	i = 0;
+	while ((wk = LIST_FIRST(&jseg->js_entries)) != NULL) {
+		WORKLIST_REMOVE(wk);
+		waiting = wk->wk_state & IOWAITING;
+		wk->wk_state &= ~(IOSTARTED | IOWAITING);
+		wk->wk_state |= COMPLETE;
+		KASSERT(i < jseg->js_cnt,
+		    ("handle_written_jseg: overflow %d >= %d",
+		    i, jseg->js_cnt));
+		jseg->js_refs++; /* Ref goes to the jsegdep below. */
+		switch (wk->wk_type) {
+		case D_JADDREF:
+			handle_written_jaddref(WK_JADDREF(wk), jseg);
+			break;
+		case D_JREMREF:
+			handle_written_jremref(WK_JREMREF(wk), jseg);
+			break;
+		case D_JMVREF:
+			jseg->js_refs--;	/* No jsegdep here. */
+			jmvref = WK_JMVREF(wk);
+			LIST_REMOVE(jmvref, jm_deps);
+			free_pagedep(jmvref->jm_pagedep);
+			WORKITEM_FREE(jmvref, D_JMVREF);
+			break;
+		case D_JNEWBLK:
+			handle_written_jnewblk(WK_JNEWBLK(wk), jseg);
+			break;
+		case D_JFREEBLK:
+			handle_written_jfreeblk(WK_JFREEBLK(wk), jseg);
+			break;
+		case D_JFREEFRAG:
+			handle_written_jfreefrag(WK_JFREEFRAG(wk), jseg);
+			break;
+		default:
+			panic("handle_written_jseg: Unknown type %s",
+			    TYPENAME(wk->wk_type));
+			/* NOTREACHED */
+		}
+		if (waiting)
+			wakeup(wk);
+	}
+	/* Release the self reference so the structure may be freed. */
+	free_jseg(jseg);
+}
+
+/*
+ * Mark a jseg as DEPCOMPLETE and throw away the buffer.  Handle jseg
+ * completions in order only.
+ */
+static void
+handle_written_jseg(jseg, bp)
+	struct jseg *jseg;
+	struct buf *bp;
+{
+	struct jblocks *jblocks;
+	struct jseg *jsegn;
+
+	if (jseg->js_refs == 0)
+		panic("handle_written_jseg: No self-reference on %p", jseg);
+	jseg->js_state |= DEPCOMPLETE;
+	/*
+	 * We'll never need this buffer again, set flags so it will be
+	 * discarded.
+	 */
+	bp->b_flags |= B_INVAL | B_NOCACHE;
+	jblocks = jseg->js_jblocks;
+	/*
+	 * Don't allow out of order completions.  If this isn't the first
+	 * block wait for it to write before we're done.
+	 */
+	if (jseg != jblocks->jb_writeseg)
+		return;
+	/* Iterate through available jsegs processing their entries. */
+	do {
+		jsegn = TAILQ_NEXT(jseg, js_next);
+		complete_jseg(jseg);
+		jseg = jsegn;
+	} while (jseg && jseg->js_state & DEPCOMPLETE);
+	jblocks->jb_writeseg = jseg;
+}
+
+static inline struct jsegdep *
+inoref_segattach(inoref, jseg)
+	struct inoref *inoref;
+	struct jseg *jseg;
+{
+	struct jsegdep *jsegdep;
+
+	jsegdep = inoref->if_jsegdep;
+	inoref->if_jsegdep = NULL;
+	jsegdep->jd_seg = jseg;
+
+	return (jsegdep);
+}
+
+/*
+ * Called once a jremref has made it to stable store.  The jremref is marked
+ * complete and we attempt to free it.  Any pagedeps writes sleeping waiting
+ * for the jremref to complete will be awoken by free_jremref.
+ */
+static void
+handle_written_jremref(jremref, jseg)
+	struct jremref *jremref;
+	struct jseg *jseg;
+{
+	struct inodedep *inodedep;
+	struct jsegdep *jsegdep;
+	struct dirrem *dirrem;
+
+	/*
+	 * Attach the jsegdep to the jseg.
+	 */
+	jsegdep = inoref_segattach(&jremref->jr_ref, jseg);
+	/*
+	 * Remove us from the inoref list.
+	 */
+	if (inodedep_lookup(jremref->jr_list.wk_mp, jremref->jr_ref.if_ino,
+	    0, &inodedep) == 0)
+		panic("handle_written_jremref: Lost inodedep");
+	TAILQ_REMOVE(&inodedep->id_inoreflst, &jremref->jr_ref, if_deps);
+	/*
+	 * Complete the dirrem.
+	 */
+	dirrem = jremref->jr_dirrem;
+	jremref->jr_dirrem = NULL;
+	LIST_REMOVE(jremref, jr_deps);
+	jsegdep->jd_state |= jremref->jr_state & MKDIR_PARENT;
+	WORKLIST_INSERT(&dirrem->dm_jwork, &jsegdep->jd_list);
+	if (LIST_EMPTY(&dirrem->dm_jremrefhd) &&
+	    (dirrem->dm_state & COMPLETE) != 0)
+		add_to_worklist(&dirrem->dm_list, 0);
+	free_jremref(jremref);
+}
+
+/*
+ * Called once a jaddref has made it to stable store.  The dependency is
+ * marked complete and any dependent structures are added to the inode
+ * bufwait list to be completed as soon as it is written.  If a bitmap write
+ * depends on this entry we move the inode into the inodedephd of the
+ * bmsafemap dependency and attempt to remove the jaddref from the bmsafemap.
+ */
+static void
+handle_written_jaddref(jaddref, jseg)
+	struct jaddref *jaddref;
+	struct jseg *jseg;
+{
+	struct jsegdep *jsegdep;
+	struct inodedep *inodedep;
+	struct diradd *diradd;
+	struct mkdir *mkdir;
+
+	/*
+	 * Attach the jsegdep to the jseg.
+	 */
+	jsegdep = inoref_segattach(&jaddref->ja_ref, jseg);
+	mkdir = NULL;
+	diradd = NULL;
+	if (inodedep_lookup(jaddref->ja_list.wk_mp, jaddref->ja_ino,
+	    0, &inodedep) == 0)
+		panic("handle_written_jaddref: Lost inodedep.");
+	if (jaddref->ja_diradd == NULL)
+		panic("handle_written_jaddref: No dependency");
+	if (jaddref->ja_diradd->da_list.wk_type == D_DIRADD) {
+		diradd = jaddref->ja_diradd;
+		WORKLIST_INSERT(&inodedep->id_bufwait, &diradd->da_list);
+	} else if (jaddref->ja_state & MKDIR_PARENT) {
+		mkdir = jaddref->ja_mkdir;
+		WORKLIST_INSERT(&inodedep->id_bufwait, &mkdir->md_list);
+	} else if (jaddref->ja_state & MKDIR_BODY)
+		mkdir = jaddref->ja_mkdir;
+	else
+		panic("handle_written_jaddref: Unknown dependency %p",
+		    jaddref->ja_diradd);
+	jaddref->ja_diradd = NULL;	/* also clears ja_mkdir */
+	/*
+	 * Remove us from the inode list.
+	 */
+	TAILQ_REMOVE(&inodedep->id_inoreflst, &jaddref->ja_ref, if_deps);
+	/*
+	 * The mkdir may be waiting on the jaddref to clear before freeing.
+	 */
+	if (mkdir) {
+		KASSERT(mkdir->md_list.wk_type == D_MKDIR,
+		    ("handle_written_jaddref: Incorrect type for mkdir %s",
+		    TYPENAME(mkdir->md_list.wk_type)));
+		mkdir->md_jaddref = NULL;
+		diradd = mkdir->md_diradd;
+		mkdir->md_state |= DEPCOMPLETE;
+		complete_mkdir(mkdir);
+	}
+	WORKLIST_INSERT(&diradd->da_jwork, &jsegdep->jd_list);
+	if (jaddref->ja_state & NEWBLOCK) {
+		inodedep->id_state |= ONDEPLIST;
+		LIST_INSERT_HEAD(&inodedep->id_bmsafemap->sm_inodedephd,
+		    inodedep, id_deps);
+	}
+	free_jaddref(jaddref);
+}
+
+/*
+ * Called once a jnewblk journal is written.  The allocdirect or allocindir
+ * is placed in the bmsafemap to await notification of a written bitmap.
+ */
+static void
+handle_written_jnewblk(jnewblk, jseg)
+	struct jnewblk *jnewblk;
+	struct jseg *jseg;
+{
+	struct bmsafemap *bmsafemap;
+	struct jsegdep *jsegdep;
+	struct newblk *newblk;
+
+	/*
+	 * Attach the jsegdep to the jseg.
+	 */
+	jsegdep = jnewblk->jn_jsegdep;
+	jnewblk->jn_jsegdep = NULL;
+	jsegdep->jd_seg = jseg;
+	/*
+	 * Add the written block to the bmsafemap so it can be notified when
+	 * the bitmap is on disk.
+	 */
+	newblk = jnewblk->jn_newblk;
+	jnewblk->jn_newblk = NULL;
+	if (newblk == NULL) 
+		panic("handle_written_jnewblk: No dependency for the segdep.");
+
+	newblk->nb_jnewblk = NULL;
+	bmsafemap = newblk->nb_bmsafemap;
+	WORKLIST_INSERT(&newblk->nb_jwork, &jsegdep->jd_list);
+	newblk->nb_state |= ONDEPLIST;
+	LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk, nb_deps);
+	free_jnewblk(jnewblk);
+}
+
+/*
+ * Cancel a jfreefrag that won't be needed, probably due to colliding with
+ * an in-flight allocation that has not yet been committed.  Divorce us
+ * from the freefrag and mark it DEPCOMPLETE so that it may be added
+ * to the worklist.
+ */
+static void
+cancel_jfreefrag(jfreefrag)
+	struct jfreefrag *jfreefrag;
+{
+	struct freefrag *freefrag;
+
+	if (jfreefrag->fr_jsegdep) {
+		free_jsegdep(jfreefrag->fr_jsegdep);
+		jfreefrag->fr_jsegdep = NULL;
+	}
+	freefrag = jfreefrag->fr_freefrag;
+	jfreefrag->fr_freefrag = NULL;
+	freefrag->ff_jfreefrag = NULL;
+	free_jfreefrag(jfreefrag);
+	freefrag->ff_state |= DEPCOMPLETE;
+}
+
+/*
+ * Free a jfreefrag when the parent freefrag is rendered obsolete.
+ */
+static void
+free_jfreefrag(jfreefrag)
+	struct jfreefrag *jfreefrag;
+{
+
+	if (jfreefrag->fr_state & IOSTARTED)
+		WORKLIST_REMOVE(&jfreefrag->fr_list);
+	else if (jfreefrag->fr_state & ONWORKLIST)
+		remove_from_journal(&jfreefrag->fr_list);
+	if (jfreefrag->fr_freefrag != NULL)
+		panic("free_jfreefrag:  Still attached to a freefrag.");
+	WORKITEM_FREE(jfreefrag, D_JFREEFRAG);
+}
+
+/*
+ * Called when the journal write for a jfreefrag completes.  The parent
+ * freefrag is added to the worklist if this completes its dependencies.
+ */
+static void
+handle_written_jfreefrag(jfreefrag, jseg)
+	struct jfreefrag *jfreefrag;
+	struct jseg *jseg;
+{
+	struct jsegdep *jsegdep;
+	struct freefrag *freefrag;
+
+	/*
+	 * Attach the jsegdep to the jseg.
+	 */
+	jsegdep = jfreefrag->fr_jsegdep;
+	jfreefrag->fr_jsegdep = NULL;
+	jsegdep->jd_seg = jseg;
+	freefrag = jfreefrag->fr_freefrag;
+	if (freefrag == NULL)
+		panic("handle_written_jfreefrag: No freefrag.");
+	freefrag->ff_state |= DEPCOMPLETE;
+	freefrag->ff_jfreefrag = NULL;
+	WORKLIST_INSERT(&freefrag->ff_jwork, &jsegdep->jd_list);
+	if ((freefrag->ff_state & ALLCOMPLETE) == ALLCOMPLETE)
+		add_to_worklist(&freefrag->ff_list, 0);
+	jfreefrag->fr_freefrag = NULL;
+	free_jfreefrag(jfreefrag);
+}
+
+/*
+ * Called when the journal write for a jfreeblk completes.  The jfreeblk
+ * is removed from the freeblks list of pending journal writes and the
+ * jsegdep is moved to the freeblks jwork to be completed when all blocks
+ * have been reclaimed.
+ */
+static void
+handle_written_jfreeblk(jfreeblk, jseg)
+	struct jfreeblk *jfreeblk;
+	struct jseg *jseg;
+{
+	struct freeblks *freeblks;
+	struct jsegdep *jsegdep;
+
+	/* Attach the jsegdep to the jseg. */
+	jsegdep = jfreeblk->jf_jsegdep;
+	jfreeblk->jf_jsegdep = NULL;
+	jsegdep->jd_seg = jseg;
+	freeblks = jfreeblk->jf_freeblks;
+	LIST_REMOVE(jfreeblk, jf_deps);
+	WORKLIST_INSERT(&freeblks->fb_jwork, &jsegdep->jd_list);
+	/*
+	 * If the freeblks is all journaled, we can add it to the worklist.
+	 */
+	if (LIST_EMPTY(&freeblks->fb_jfreeblkhd) &&
+	    (freeblks->fb_state & ALLCOMPLETE) == ALLCOMPLETE) {
+		/* Remove from the b_dep that is waiting on this write. */
+		if (freeblks->fb_state & ONWORKLIST)
+			WORKLIST_REMOVE(&freeblks->fb_list);
+		add_to_worklist(&freeblks->fb_list, 1);
+	}
+
+	free_jfreeblk(jfreeblk);
+}
+
+static struct jsegdep *
+newjsegdep(struct worklist *wk)
+{
+	struct jsegdep *jsegdep;
+
+	jsegdep = malloc(sizeof(*jsegdep), M_JSEGDEP, M_SOFTDEP_FLAGS);
+	workitem_alloc(&jsegdep->jd_list, D_JSEGDEP, wk->wk_mp);
+	jsegdep->jd_seg = NULL;
+
+	return (jsegdep);
+}
+
+static struct jmvref *
+newjmvref(dp, ino, oldoff, newoff)
+	struct inode *dp;
+	ino_t ino;
+	off_t oldoff;
+	off_t newoff;
+{
+	struct jmvref *jmvref;
+
+	jmvref = malloc(sizeof(*jmvref), M_JMVREF, M_SOFTDEP_FLAGS);
+	workitem_alloc(&jmvref->jm_list, D_JMVREF, UFSTOVFS(dp->i_ump));
+	jmvref->jm_list.wk_state = ATTACHED | DEPCOMPLETE;
+	jmvref->jm_parent = dp->i_number;
+	jmvref->jm_ino = ino;
+	jmvref->jm_oldoff = oldoff;
+	jmvref->jm_newoff = newoff;
+
+	return (jmvref);
+}
+
+/*
+ * Allocate a new jremref that tracks the removal of ip from dp with the
+ * directory entry offset of diroff.  Mark the entry as ATTACHED and
+ * DEPCOMPLETE as we have all the information required for the journal write
+ * and the directory has already been removed from the buffer.  The caller
+ * is responsible for linking the jremref into the pagedep and adding it
+ * to the journal to write.  The MKDIR_PARENT flag is set if we're doing
+ * a DOTDOT addition so handle_workitem_remove() can properly assign
+ * the jsegdep when we're done.
+ */
+static struct jremref *
+newjremref(dirrem, dp, ip, diroff, nlink)
+	struct dirrem *dirrem;
+	struct inode *dp;
+	struct inode *ip;
+	off_t diroff;
+	nlink_t nlink;
+{
+	struct jremref *jremref;
+
+	jremref = malloc(sizeof(*jremref), M_JREMREF, M_SOFTDEP_FLAGS);
+	workitem_alloc(&jremref->jr_list, D_JREMREF, UFSTOVFS(dp->i_ump));
+	jremref->jr_state = ATTACHED;
+	newinoref(&jremref->jr_ref, ip->i_number, dp->i_number, diroff,
+	   nlink, ip->i_mode);
+	jremref->jr_dirrem = dirrem;
+
+	return (jremref);
+}
+
+static inline void
+newinoref(inoref, ino, parent, diroff, nlink, mode)
+	struct inoref *inoref;
+	ino_t ino;
+	ino_t parent;
+	off_t diroff;
+	nlink_t nlink;
+	uint16_t mode;
+{
+
+	inoref->if_jsegdep = newjsegdep(&inoref->if_list);
+	inoref->if_diroff = diroff;
+	inoref->if_ino = ino;
+	inoref->if_parent = parent;
+	inoref->if_nlink = nlink;
+	inoref->if_mode = mode;
+}
+
+/*
+ * Allocate a new jaddref to track the addition of ino to dp at diroff.  The
+ * directory offset may not be known until later.  The caller is responsible
+ * adding the entry to the journal when this information is available.  nlink
+ * should be the link count prior to the addition and mode is only required
+ * to have the correct FMT.
+ */
+static struct jaddref *
+newjaddref(dp, ino, diroff, nlink, mode)
+	struct inode *dp;
+	ino_t ino;
+	off_t diroff;
+	int16_t nlink;
+	uint16_t mode;
+{
+	struct jaddref *jaddref;
+
+	jaddref = malloc(sizeof(*jaddref), M_JADDREF, M_SOFTDEP_FLAGS);
+	workitem_alloc(&jaddref->ja_list, D_JADDREF, UFSTOVFS(dp->i_ump));
+	jaddref->ja_state = ATTACHED;
+	jaddref->ja_mkdir = NULL;
+	newinoref(&jaddref->ja_ref, ino, dp->i_number, diroff, nlink, mode);
+
+	return (jaddref);
+}
+
+/*
+ * Create a new free dependency for a freework.  The caller is responsible
+ * for adjusting the reference count when it has the lock held.  The freedep
+ * will track an outstanding bitmap write that will ultimately clear the
+ * freework to continue.
+ */
+static struct freedep *
+newfreedep(struct freework *freework)
+{
+	struct freedep *freedep;
+
+	freedep = malloc(sizeof(*freedep), M_FREEDEP, M_SOFTDEP_FLAGS);
+	workitem_alloc(&freedep->fd_list, D_FREEDEP, freework->fw_list.wk_mp);
+	freedep->fd_freework = freework;
+
+	return (freedep);
+}
+
+/*
+ * Free a freedep structure once the buffer it is linked to is written.  If
+ * this is the last reference to the freework schedule it for completion.
+ */
+static void
+free_freedep(freedep)
+	struct freedep *freedep;
+{
+
+	if (--freedep->fd_freework->fw_ref == 0)
+		add_to_worklist(&freedep->fd_freework->fw_list, 1);
+	WORKITEM_FREE(freedep, D_FREEDEP);
+}
+
+/*
+ * Allocate a new freework structure that may be a level in an indirect
+ * when parent is not NULL or a top level block when it is.  The top level
+ * freework structures are allocated without lk held and before the freeblks
+ * is visible outside of softdep_setup_freeblocks().
+ */
+static struct freework *
+newfreework(freeblks, parent, lbn, nb, frags, journal)
+	struct freeblks *freeblks;
+	struct freework *parent;
+	ufs_lbn_t lbn;
+	ufs2_daddr_t nb;
+	int frags;
+	int journal;
+{
+	struct freework *freework;
+
+	freework = malloc(sizeof(*freework), M_FREEWORK, M_SOFTDEP_FLAGS);
+	workitem_alloc(&freework->fw_list, D_FREEWORK, freeblks->fb_list.wk_mp);
+	freework->fw_freeblks = freeblks;
+	freework->fw_parent = parent;
+	freework->fw_lbn = lbn;
+	freework->fw_blkno = nb;
+	freework->fw_frags = frags;
+	freework->fw_ref = 0;
+	freework->fw_off = 0;
+	LIST_INIT(&freework->fw_jwork);
+
+	if (parent == NULL) {
+		WORKLIST_INSERT_UNLOCKED(&freeblks->fb_freeworkhd,
+		    &freework->fw_list);
+		freeblks->fb_ref++;
+	}
+	if (journal)
+		newjfreeblk(freeblks, lbn, nb, frags);
+
+	return (freework);
+}
+
+/*
+ * Allocate a new jfreeblk to journal top level block pointer when truncating
+ * a file.  The caller must add this to the worklist when lk is held.
+ */
+static struct jfreeblk *
+newjfreeblk(freeblks, lbn, blkno, frags)
+	struct freeblks *freeblks;
+	ufs_lbn_t lbn;
+	ufs2_daddr_t blkno;
+	int frags;
+{
+	struct jfreeblk *jfreeblk;
+
+	jfreeblk = malloc(sizeof(*jfreeblk), M_JFREEBLK, M_SOFTDEP_FLAGS);
+	workitem_alloc(&jfreeblk->jf_list, D_JFREEBLK, freeblks->fb_list.wk_mp);
+	jfreeblk->jf_jsegdep = newjsegdep(&jfreeblk->jf_list);
+	jfreeblk->jf_state = ATTACHED | DEPCOMPLETE;
+	jfreeblk->jf_ino = freeblks->fb_previousinum;
+	jfreeblk->jf_lbn = lbn;
+	jfreeblk->jf_blkno = blkno;
+	jfreeblk->jf_frags = frags;
+	jfreeblk->jf_freeblks = freeblks;
+	LIST_INSERT_HEAD(&freeblks->fb_jfreeblkhd, jfreeblk, jf_deps);
+
+	return (jfreeblk);
+}
+
+static void move_newblock_dep(struct jaddref *, struct inodedep *);
+/*
+ * If we're canceling a new bitmap we have to search for another ref
+ * to move into the bmsafemap dep.  This might be better expressed
+ * with another structure.
+ */
+static void
+move_newblock_dep(jaddref, inodedep)
+	struct jaddref *jaddref;
+	struct inodedep *inodedep;
+{
+	struct inoref *inoref;
+	struct jaddref *jaddrefn;
+
+	jaddrefn = NULL;
+	for (inoref = TAILQ_NEXT(&jaddref->ja_ref, if_deps); inoref;
+	    inoref = TAILQ_NEXT(inoref, if_deps)) {
+		if ((jaddref->ja_state & NEWBLOCK) &&
+		    inoref->if_list.wk_type == D_JADDREF) {
+			jaddrefn = (struct jaddref *)inoref;
+			break;
+		}
+	}
+	if (jaddrefn == NULL)
+		return;
+	if (inodedep == NULL)
+		if (inodedep_lookup(jaddref->ja_list.wk_mp, jaddref->ja_ino,
+		    0, &inodedep) == 0)
+			panic("move_newblock_dep: Lost inodedep");
+	jaddrefn->ja_state &= ~(ATTACHED | UNDONE);
+	jaddrefn->ja_state |= jaddref->ja_state &
+	    (ATTACHED | UNDONE | NEWBLOCK);
+	jaddref->ja_state &= ~(ATTACHED | UNDONE | NEWBLOCK);
+	jaddref->ja_state |= ATTACHED;
+	LIST_REMOVE(jaddref, ja_bmdeps);
+	LIST_INSERT_HEAD(&inodedep->id_bmsafemap->sm_jaddrefhd, jaddrefn,
+	    ja_bmdeps);
+}
+
+/*
+ * Cancel a jaddref either before it has been written or while it is being
+ * written.  This happens when a link is removed before the add reaches
+ * the disk.  The jaddref dependency is kept linked into the bmsafemap
+ * and inode to prevent the link count or bitmap from reaching the disk
+ * until handle_workitem_remove() re-adjusts the counts and bitmaps as
+ * required.
+ *
+ * Returns 1 if the canceled addref requires journaling of the remove and
+ * 0 otherwise.
+ */
+static int
+cancel_jaddref(jaddref, inodedep, wkhd)
+	struct jaddref *jaddref;
+	struct inodedep *inodedep;
+	struct workhead *wkhd;
+{
+	struct inoref *inoref;
+	int needsj;
+
+	KASSERT((jaddref->ja_state & COMPLETE) == 0,
+	    ("cancel_jaddref: Canceling complete jaddref"));
+	if (jaddref->ja_state & (IOSTARTED | COMPLETE))
+		needsj = 1;
+	else
+		needsj = 0;
+	/*
+	 * If we're not journaling this remove we must adjust the nlink of
+	 * any reference operation that follows us so that it is consistent
+	 * with the in-memory reference.
+	 */
+	if (needsj == 0)
+		for (inoref = TAILQ_NEXT(&jaddref->ja_ref, if_deps); inoref;
+		    inoref = TAILQ_NEXT(inoref, if_deps))
+			inoref->if_nlink--;
+	if (jaddref->ja_ref.if_jsegdep) {
+		free_jsegdep(jaddref->ja_ref.if_jsegdep);
+		jaddref->ja_ref.if_jsegdep = NULL;
+	}
+	if (jaddref->ja_state & NEWBLOCK)
+		move_newblock_dep(jaddref, inodedep);
+	if (jaddref->ja_state & IOWAITING) {
+		jaddref->ja_state &= ~IOWAITING;
+		wakeup(&jaddref->ja_list);
+	}
+	jaddref->ja_mkdir = NULL;
+	if (jaddref->ja_state & IOSTARTED) {
+		jaddref->ja_state &= ~IOSTARTED;
+		WORKLIST_REMOVE(&jaddref->ja_list);
+	} else
+		remove_from_journal(&jaddref->ja_list);
+	jaddref->ja_state |= GOINGAWAY;
+	/*
+	 * Leave the head of the list for jsegdeps for fast merging.
+	 */
+	if (LIST_FIRST(wkhd) != NULL) {
+		jaddref->ja_state |= ONWORKLIST;
+		LIST_INSERT_AFTER(LIST_FIRST(wkhd), &jaddref->ja_list, wk_list);
+	} else
+		WORKLIST_INSERT(wkhd, &jaddref->ja_list);
+
+	return (needsj);
+}
+
+/* 
+ * Attempt to free a jaddref structure when some work completes.  This
+ * should only succeed once the entry is written and all dependencies have
+ * been notified.
+ */
+static void
+free_jaddref(jaddref)
+	struct jaddref *jaddref;
+{
+
+	if ((jaddref->ja_state & ALLCOMPLETE) != ALLCOMPLETE)
+		return;
+	if (jaddref->ja_ref.if_jsegdep)
+		panic("free_jaddref: segdep attached to jaddref %p(0x%X)\n",
+		    jaddref, jaddref->ja_state);
+	if (jaddref->ja_state & NEWBLOCK)
+		LIST_REMOVE(jaddref, ja_bmdeps);
+	if (jaddref->ja_state & (IOSTARTED | ONWORKLIST))
+		panic("free_jaddref: Bad state %p(0x%X)",
+		    jaddref, jaddref->ja_state);
+	if (jaddref->ja_mkdir != NULL)
+		panic("free_jaddref: Work pending, 0x%X\n", jaddref->ja_state);
+	WORKITEM_FREE(jaddref, D_JADDREF);
+}
+
+/*
+ * Free a jremref structure once it has been written or discarded.
+ */
+static void
+free_jremref(jremref)
+	struct jremref *jremref;
+{
+
+	if (jremref->jr_ref.if_jsegdep)
+		free_jsegdep(jremref->jr_ref.if_jsegdep);
+	if (jremref->jr_state & IOSTARTED)
+		panic("free_jremref: IO still pending");
+	WORKITEM_FREE(jremref, D_JREMREF);
+}
+
+/*
+ * Free a jnewblk structure.
+ */
+static void
+free_jnewblk(jnewblk)
+	struct jnewblk *jnewblk;
+{
+
+	if ((jnewblk->jn_state & ALLCOMPLETE) != ALLCOMPLETE)
+		return;
+	LIST_REMOVE(jnewblk, jn_deps);
+	if (jnewblk->jn_newblk != NULL)
+		panic("free_jnewblk: Dependency still attached.");
+	WORKITEM_FREE(jnewblk, D_JNEWBLK);
+}
+
+/*
+ * Cancel a jnewblk which has been superseded by a freeblk.  The jnewblk
+ * is kept linked into the bmsafemap until the free completes, thus
+ * preventing the modified state from ever reaching disk.  The free
+ * routine must pass this structure via ffs_blkfree() to
+ * softdep_setup_freeblks() so there is no race in releasing the space.
+ */
+static void
+cancel_jnewblk(jnewblk, wkhd)
+	struct jnewblk *jnewblk;
+	struct workhead *wkhd;
+{
+
+	if (jnewblk->jn_jsegdep) {
+		free_jsegdep(jnewblk->jn_jsegdep);
+		jnewblk->jn_jsegdep = NULL;
+	}
+	if (jnewblk->jn_state & IOWAITING) {
+		jnewblk->jn_state &= ~IOWAITING;
+		wakeup(&jnewblk->jn_list);
+	}
+	jnewblk->jn_newblk = NULL;
+	jnewblk->jn_state |= GOINGAWAY;
+	if (jnewblk->jn_state & IOSTARTED) {
+		jnewblk->jn_state &= ~IOSTARTED;
+		WORKLIST_REMOVE(&jnewblk->jn_list);
+	} else
+		remove_from_journal(&jnewblk->jn_list);
+	/*
+	 * Leave the head of the list for jsegdeps for fast merging.
+	 */
+	if (LIST_FIRST(wkhd) != NULL) {
+		jnewblk->jn_state |= ONWORKLIST;
+		LIST_INSERT_AFTER(LIST_FIRST(wkhd), &jnewblk->jn_list, wk_list);
+	} else
+		WORKLIST_INSERT(wkhd, &jnewblk->jn_list);
+}
+
+static void
+free_jfreeblk(jfreeblk)
+	struct jfreeblk *jfreeblk;
+{
+
+	WORKITEM_FREE(jfreeblk, D_JFREEBLK);
+}
+
+/*
+ * Release one reference to a jseg and free it if the count reaches 0.  This
+ * should eventually reclaim journal space as well.
+ */
+static void
+free_jseg(jseg)
+	struct jseg *jseg;
+{
+	struct jblocks *jblocks;
+
+	KASSERT(jseg->js_refs > 0,
+	    ("free_jseg: Invalid refcnt %d", jseg->js_refs));
+	if (--jseg->js_refs != 0)
+		return;
+	/*
+	 * Free only those jsegs which have none allocated before them to
+	 * preserve the journal space ordering.
+	 */
+	jblocks = jseg->js_jblocks;
+	while ((jseg = TAILQ_FIRST(&jblocks->jb_segs)) != NULL) {
+		jblocks->jb_oldestseq = jseg->js_seq;
+		if (jseg->js_refs != 0)
+			break;
+		TAILQ_REMOVE(&jblocks->jb_segs, jseg, js_next);
+		jblocks_free(jblocks, jseg->js_list.wk_mp, jseg->js_size);
+		KASSERT(LIST_EMPTY(&jseg->js_entries),
+		    ("free_jseg: Freed jseg has valid entries."));
+		WORKITEM_FREE(jseg, D_JSEG);
+	}
+}
+
+/*
+ * Release a jsegdep and decrement the jseg count.
+ */
+static void
+free_jsegdep(jsegdep)
+	struct jsegdep *jsegdep;
+{
+
+	if (jsegdep->jd_seg)
+		free_jseg(jsegdep->jd_seg);
+	WORKITEM_FREE(jsegdep, D_JSEGDEP);
+}
+
+/*
+ * Wait for a journal item to make it to disk.  Initiate journal processing
+ * if required.
+ */
+static void
+jwait(wk)
+	struct worklist *wk;
+{
+
+	/*
+	 * If IO has not started we process the journal.  We can't mark the
+	 * worklist item as IOWAITING because we drop the lock while
+	 * processing the journal and the worklist entry may be freed after
+	 * this point.  The caller may call back in and re-issue the request.
+	 */
+	if ((wk->wk_state & IOSTARTED) == 0) {
+		softdep_process_journal(wk->wk_mp, MNT_WAIT);
+		return;
+	}
+	wk->wk_state |= IOWAITING;
+	msleep(wk, &lk, PRIBIO, "jwait", 0);
+}
+
+/*
+ * Lookup an inodedep based on an inode pointer and set the nlinkdelta as
+ * appropriate.  This is a convenience function to reduce duplicate code
+ * for the setup and revert functions below.
+ */
+static struct inodedep *
+inodedep_lookup_ip(ip)
+	struct inode *ip;
+{
+	struct inodedep *inodedep;
+
+	KASSERT(ip->i_nlink >= ip->i_effnlink,
+	    ("inodedep_lookup_ip: bad delta"));
+	(void) inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number,
+	    DEPALLOC, &inodedep);
+	inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
+
+	return (inodedep);
+}
+
+/*
+ * Called prior to creating a new inode and linking it to a directory.  The
+ * jaddref structure must already be allocated by softdep_setup_inomapdep
+ * and it is discovered here so we can initialize the mode and update
+ * nlinkdelta.
+ */
+void
+softdep_setup_create(dp, ip)
+	struct inode *dp;
+	struct inode *ip;
+{
+	struct inodedep *inodedep;
+	struct jaddref *jaddref;
+	struct vnode *dvp;
+
+	KASSERT(ip->i_nlink == 1,
+	    ("softdep_setup_create: Invalid link count."));
+	dvp = ITOV(dp);
+	ACQUIRE_LOCK(&lk);
+	inodedep = inodedep_lookup_ip(ip);
+	if (DOINGSUJ(dvp)) {
+		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
+		    inoreflst);
+		KASSERT(jaddref != NULL && jaddref->ja_parent == dp->i_number,
+		    ("softdep_setup_create: No addref structure present."));
+		jaddref->ja_mode = ip->i_mode;
+		softdep_prelink(dvp, NULL);
+	}
+	FREE_LOCK(&lk);
+}
+
+/*
+ * Create a jaddref structure to track the addition of a DOTDOT link when
+ * we are reparenting an inode as part of a rename.  This jaddref will be
+ * found by softdep_setup_directory_change.  Adjusts nlinkdelta for
+ * non-journaling softdep.
+ */
+void
+softdep_setup_dotdot_link(dp, ip)
+	struct inode *dp;
+	struct inode *ip;
+{
+	struct inodedep *inodedep;
+	struct jaddref *jaddref;
+	struct vnode *dvp;
+
+	dvp = ITOV(dp);
+	jaddref = NULL;
+	/*
+	 * We don't set MKDIR_PARENT as this is not tied to a mkdir and
+	 * is used as a normal link would be.
+	 */
+	if (DOINGSUJ(dvp))
+		jaddref = newjaddref(ip, dp->i_number, DOTDOT_OFFSET,
+		    dp->i_effnlink - 1, dp->i_mode);
+	ACQUIRE_LOCK(&lk);
+	inodedep = inodedep_lookup_ip(dp);
+	if (jaddref) {
+		TAILQ_INSERT_TAIL(&inodedep->id_inoreflst, &jaddref->ja_ref,
+		    if_deps);
+		softdep_prelink(dvp, ITOV(ip));
+	}
+	FREE_LOCK(&lk);
+}
+
+/*
+ * Create a jaddref structure to track a new link to an inode.  The directory
+ * offset is not known until softdep_setup_directory_add or
+ * softdep_setup_directory_change.  Adjusts nlinkdelta for non-journaling
+ * softdep.
+ */
+void
+softdep_setup_link(dp, ip)
+	struct inode *dp;
+	struct inode *ip;
+{
+	struct inodedep *inodedep;
+	struct jaddref *jaddref;
+	struct vnode *dvp;
+
+	dvp = ITOV(dp);
+	jaddref = NULL;
+	if (DOINGSUJ(dvp))
+		jaddref = newjaddref(dp, ip->i_number, 0, ip->i_effnlink - 1,
+		    ip->i_mode);
+	ACQUIRE_LOCK(&lk);
+	inodedep = inodedep_lookup_ip(ip);
+	if (jaddref) {
+		TAILQ_INSERT_TAIL(&inodedep->id_inoreflst, &jaddref->ja_ref,
+		    if_deps);
+		softdep_prelink(dvp, ITOV(ip));
+	}
+	FREE_LOCK(&lk);
+}
+
+/*
+ * Called to create the jaddref structures to track . and .. references as
+ * well as lookup and further initialize the incomplete jaddref created
+ * by softdep_setup_inomapdep when the inode was allocated.  Adjusts
+ * nlinkdelta for non-journaling softdep.
+ */
+void
+softdep_setup_mkdir(dp, ip)
+	struct inode *dp;
+	struct inode *ip;
+{
+	struct inodedep *inodedep;
+	struct jaddref *dotdotaddref;
+	struct jaddref *dotaddref;
+	struct jaddref *jaddref;
+	struct vnode *dvp;
+
+	dvp = ITOV(dp);
+	dotaddref = dotdotaddref = NULL;
+	if (DOINGSUJ(dvp)) {
+		dotaddref = newjaddref(ip, ip->i_number, DOT_OFFSET, 1,
+		    ip->i_mode);
+		dotaddref->ja_state |= MKDIR_BODY;
+		dotdotaddref = newjaddref(ip, dp->i_number, DOTDOT_OFFSET,
+		    dp->i_effnlink - 1, dp->i_mode);
+		dotdotaddref->ja_state |= MKDIR_PARENT;
+	}
+	ACQUIRE_LOCK(&lk);
+	inodedep = inodedep_lookup_ip(ip);
+	if (DOINGSUJ(dvp)) {
+		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
+		    inoreflst);
+		KASSERT(jaddref != NULL,
+		    ("softdep_setup_mkdir: No addref structure present."));
+		KASSERT(jaddref->ja_parent == dp->i_number, 
+		    ("softdep_setup_mkdir: bad parent %d",
+		    jaddref->ja_parent));
+		jaddref->ja_mode = ip->i_mode;
+		TAILQ_INSERT_BEFORE(&jaddref->ja_ref, &dotaddref->ja_ref,
+		    if_deps);
+	}
+	inodedep = inodedep_lookup_ip(dp);
+	if (DOINGSUJ(dvp)) {
+		TAILQ_INSERT_TAIL(&inodedep->id_inoreflst,
+		    &dotdotaddref->ja_ref, if_deps);
+		softdep_prelink(ITOV(dp), NULL);
+	}
+	FREE_LOCK(&lk);
+}
+
+/*
+ * Called to track nlinkdelta of the inode and parent directories prior to
+ * unlinking a directory.
+ */
+void
+softdep_setup_rmdir(dp, ip)
+	struct inode *dp;
+	struct inode *ip;
+{
+	struct vnode *dvp;
+
+	dvp = ITOV(dp);
+	ACQUIRE_LOCK(&lk);
+	(void) inodedep_lookup_ip(ip);
+	(void) inodedep_lookup_ip(dp);
+	if (DOINGSUJ(dvp))
+		softdep_prelink(dvp, ITOV(ip));
+	FREE_LOCK(&lk);
+}
+
+/*
+ * Called to track nlinkdelta of the inode and parent directories prior to
+ * unlink.
+ */
+void
+softdep_setup_unlink(dp, ip)
+	struct inode *dp;
+	struct inode *ip;
+{
+	struct vnode *dvp;
+
+	dvp = ITOV(dp);
+	ACQUIRE_LOCK(&lk);
+	(void) inodedep_lookup_ip(ip);
+	(void) inodedep_lookup_ip(dp);
+	if (DOINGSUJ(dvp))
+		softdep_prelink(dvp, ITOV(ip));
+	FREE_LOCK(&lk);
+}
+
+/*
+ * Called to release the journal structures created by a failed non-directory
+ * creation.  Adjusts nlinkdelta for non-journaling softdep.
+ */
+void
+softdep_revert_create(dp, ip)
+	struct inode *dp;
+	struct inode *ip;
+{
+	struct inodedep *inodedep;
+	struct jaddref *jaddref;
+	struct vnode *dvp;
+
+	dvp = ITOV(dp);
+	ACQUIRE_LOCK(&lk);
+	inodedep = inodedep_lookup_ip(ip);
+	if (DOINGSUJ(dvp)) {
+		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
+		    inoreflst);
+		KASSERT(jaddref->ja_parent == dp->i_number,
+		    ("softdep_revert_create: addref parent mismatch"));
+		cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait);
+	}
+	FREE_LOCK(&lk);
+}
+
+/*
+ * Called to release the journal structures created by a failed dotdot link
+ * creation.  Adjusts nlinkdelta for non-journaling softdep.
+ */
+void
+softdep_revert_dotdot_link(dp, ip)
+	struct inode *dp;
+	struct inode *ip;
+{
+	struct inodedep *inodedep;
+	struct jaddref *jaddref;
+	struct vnode *dvp;
+
+	dvp = ITOV(dp);
+	ACQUIRE_LOCK(&lk);
+	inodedep = inodedep_lookup_ip(dp);
+	if (DOINGSUJ(dvp)) {
+		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
+		    inoreflst);
+		KASSERT(jaddref->ja_parent == ip->i_number,
+		    ("softdep_revert_dotdot_link: addref parent mismatch"));
+		cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait);
+	}
+	FREE_LOCK(&lk);
+}
+
+/*
+ * Called to release the journal structures created by a failed link
+ * addition.  Adjusts nlinkdelta for non-journaling softdep.
+ */
+void
+softdep_revert_link(dp, ip)
+	struct inode *dp;
+	struct inode *ip;
+{
+	struct inodedep *inodedep;
+	struct jaddref *jaddref;
+	struct vnode *dvp;
+
+	dvp = ITOV(dp);
+	ACQUIRE_LOCK(&lk);
+	inodedep = inodedep_lookup_ip(ip);
+	if (DOINGSUJ(dvp)) {
+		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
+		    inoreflst);
+		KASSERT(jaddref->ja_parent == dp->i_number,
+		    ("softdep_revert_link: addref parent mismatch"));
+		cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait);
+	}
+	FREE_LOCK(&lk);
+}
+
+/*
+ * Called to release the journal structures created by a failed mkdir
+ * attempt.  Adjusts nlinkdelta for non-journaling softdep.
+ */
+void
+softdep_revert_mkdir(dp, ip)
+	struct inode *dp;
+	struct inode *ip;
+{
+	struct inodedep *inodedep;
+	struct jaddref *jaddref;
+	struct vnode *dvp;
+
+	dvp = ITOV(dp);
+
+	ACQUIRE_LOCK(&lk);
+	inodedep = inodedep_lookup_ip(dp);
+	if (DOINGSUJ(dvp)) {
+		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
+		    inoreflst);
+		KASSERT(jaddref->ja_parent == ip->i_number,
+		    ("softdep_revert_mkdir: dotdot addref parent mismatch"));
+		cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait);
+	}
+	inodedep = inodedep_lookup_ip(ip);
+	if (DOINGSUJ(dvp)) {
+		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
+		    inoreflst);
+		KASSERT(jaddref->ja_parent == dp->i_number,
+		    ("softdep_revert_mkdir: addref parent mismatch"));
+		cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait);
+		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
+		    inoreflst);
+		KASSERT(jaddref->ja_parent == ip->i_number,
+		    ("softdep_revert_mkdir: dot addref parent mismatch"));
+		cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait);
+	}
+	FREE_LOCK(&lk);
+}
+
+/* 
+ * Called to correct nlinkdelta after a failed rmdir.
+ */
+void
+softdep_revert_rmdir(dp, ip)
+	struct inode *dp;
+	struct inode *ip;
+{
+
+	ACQUIRE_LOCK(&lk);
+	(void) inodedep_lookup_ip(ip);
+	(void) inodedep_lookup_ip(dp);
+	FREE_LOCK(&lk);
+}
+
+/*
  * Protecting the freemaps (or bitmaps).
  * 
  * To eliminate the need to execute fsck before mounting a filesystem
@@ -1536,22 +3788,44 @@ softdep_setup_inomapdep(bp, ip, newinum)
 {
 	struct inodedep *inodedep;
 	struct bmsafemap *bmsafemap;
+	struct jaddref *jaddref;
+	struct mount *mp;
+	struct fs *fs;
 
+	mp = UFSTOVFS(ip->i_ump);
+	fs = ip->i_ump->um_fs;
+	jaddref = NULL;
+
 	/*
+	 * Allocate the journal reference add structure so that the bitmap
+	 * can be dependent on it.
+	 */
+	if (mp->mnt_flag & MNT_SUJ) {
+		jaddref = newjaddref(ip, newinum, 0, 0, 0);
+		jaddref->ja_state |= NEWBLOCK;
+	}
+
+	/*
 	 * Create a dependency for the newly allocated inode.
 	 * Panic if it already exists as something is seriously wrong.
 	 * Otherwise add it to the dependency list for the buffer holding
 	 * the cylinder group map from which it was allocated.
 	 */
 	ACQUIRE_LOCK(&lk);
-	if ((inodedep_lookup(UFSTOVFS(ip->i_ump), newinum, DEPALLOC|NODELAY,
-	    &inodedep)))
-		panic("softdep_setup_inomapdep: dependency for new inode "
-		    "already exists");
-	inodedep->id_buf = bp;
+	if ((inodedep_lookup(mp, newinum, DEPALLOC|NODELAY, &inodedep)))
+		panic("softdep_setup_inomapdep: dependency %p for new"
+		    "inode already exists", inodedep);
+	bmsafemap = bmsafemap_lookup(mp, bp, ino_to_cg(fs, newinum));
+	if (jaddref) {
+		LIST_INSERT_HEAD(&bmsafemap->sm_jaddrefhd, jaddref, ja_bmdeps);
+		TAILQ_INSERT_TAIL(&inodedep->id_inoreflst, &jaddref->ja_ref,
+		    if_deps);
+	} else {
+		inodedep->id_state |= ONDEPLIST;
+		LIST_INSERT_HEAD(&bmsafemap->sm_inodedephd, inodedep, id_deps);
+	}
+	inodedep->id_bmsafemap = bmsafemap;
 	inodedep->id_state &= ~DEPCOMPLETE;
-	bmsafemap = bmsafemap_lookup(inodedep->id_list.wk_mp, bp);
-	LIST_INSERT_HEAD(&bmsafemap->sm_inodedephd, inodedep, id_deps);
 	FREE_LOCK(&lk);
 }
 
@@ -1560,29 +3834,98 @@ softdep_setup_inomapdep(bp, ip, newinum)
  * allocate block or fragment.
  */
 void
-softdep_setup_blkmapdep(bp, mp, newblkno)
+softdep_setup_blkmapdep(bp, mp, newblkno, frags, oldfrags)
 	struct buf *bp;		/* buffer for cylgroup block with block map */
 	struct mount *mp;	/* filesystem doing allocation */
 	ufs2_daddr_t newblkno;	/* number of newly allocated block */
+	int frags;		/* Number of fragments. */
+	int oldfrags;		/* Previous number of fragments for extend. */
 {
 	struct newblk *newblk;
 	struct bmsafemap *bmsafemap;
+	struct jnewblk *jnewblk;
 	struct fs *fs;
 
 	fs = VFSTOUFS(mp)->um_fs;
+	jnewblk = NULL;
 	/*
 	 * Create a dependency for the newly allocated block.
 	 * Add it to the dependency list for the buffer holding
 	 * the cylinder group map from which it was allocated.
 	 */
+	if (mp->mnt_flag & MNT_SUJ) {
+		jnewblk = malloc(sizeof(*jnewblk), M_JNEWBLK, M_SOFTDEP_FLAGS);
+		workitem_alloc(&jnewblk->jn_list, D_JNEWBLK, mp);
+		jnewblk->jn_jsegdep = newjsegdep(&jnewblk->jn_list);
+		jnewblk->jn_state = ATTACHED;
+		jnewblk->jn_blkno = newblkno;
+		jnewblk->jn_frags = frags;
+		jnewblk->jn_oldfrags = oldfrags;
+#ifdef SUJ_DEBUG
+		{
+			struct cg *cgp;
+			uint8_t *blksfree;
+			long bno;
+			int i;
+	
+			cgp = (struct cg *)bp->b_data;
+			blksfree = cg_blksfree(cgp);
+			bno = dtogd(fs, jnewblk->jn_blkno);
+			for (i = jnewblk->jn_oldfrags; i < jnewblk->jn_frags;
+			    i++) {
+				if (isset(blksfree, bno + i))
+					panic("softdep_setup_blkmapdep: "
+					    "free fragment %d from %d-%d "
+					    "state 0x%X dep %p", i,
+					    jnewblk->jn_oldfrags,
+					    jnewblk->jn_frags,
+					    jnewblk->jn_state,
+					    jnewblk->jn_newblk);
+			}
+		}
+#endif
+	}
 	ACQUIRE_LOCK(&lk);
-	if (newblk_lookup(fs, newblkno, DEPALLOC, &newblk) != 0)
+	if (newblk_lookup(mp, newblkno, DEPALLOC, &newblk) != 0)
 		panic("softdep_setup_blkmapdep: found block");
-	newblk->nb_bmsafemap = bmsafemap = bmsafemap_lookup(mp, bp);
-	LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk, nb_deps);
+	newblk->nb_bmsafemap = bmsafemap = bmsafemap_lookup(mp, bp,
+	    dtog(fs, newblkno));
+	if (jnewblk) {
+		jnewblk->jn_newblk = newblk;
+		LIST_INSERT_HEAD(&bmsafemap->sm_jnewblkhd, jnewblk, jn_deps);
+	} else {
+		newblk->nb_state |= ONDEPLIST;
+		LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk, nb_deps);
+	}
+	newblk->nb_bmsafemap = bmsafemap;
+	newblk->nb_jnewblk = jnewblk;
 	FREE_LOCK(&lk);
 }
 
+#define	BMSAFEMAP_HASH(fs, cg) \
+      (&bmsafemap_hashtbl[((((register_t)(fs)) >> 13) + (cg)) & bmsafemap_hash])
+
+static int
+bmsafemap_find(bmsafemaphd, mp, cg, bmsafemapp)
+	struct bmsafemap_hashhead *bmsafemaphd;
+	struct mount *mp;
+	int cg;
+	struct bmsafemap **bmsafemapp;
+{
+	struct bmsafemap *bmsafemap;
+
+	LIST_FOREACH(bmsafemap, bmsafemaphd, sm_hash)
+		if (bmsafemap->sm_list.wk_mp == mp && bmsafemap->sm_cg == cg)
+			break;
+	if (bmsafemap) {
+		*bmsafemapp = bmsafemap;
+		return (1);
+	}
+	*bmsafemapp = NULL;
+
+	return (0);
+}
+
 /*
  * Find the bmsafemap associated with a cylinder group buffer.
  * If none exists, create one. The buffer must be locked when
@@ -1590,27 +3933,43 @@ void
  * splbio interrupts blocked.
  */
 static struct bmsafemap *
-bmsafemap_lookup(mp, bp)
+bmsafemap_lookup(mp, bp, cg)
 	struct mount *mp;
 	struct buf *bp;
+	int cg;
 {
-	struct bmsafemap *bmsafemap;
+	struct bmsafemap_hashhead *bmsafemaphd;
+	struct bmsafemap *bmsafemap, *collision;
 	struct worklist *wk;
+	struct fs *fs;
 
 	mtx_assert(&lk, MA_OWNED);
-	LIST_FOREACH(wk, &bp->b_dep, wk_list)
-		if (wk->wk_type == D_BMSAFEMAP)
-			return (WK_BMSAFEMAP(wk));
+	if (bp)
+		LIST_FOREACH(wk, &bp->b_dep, wk_list)
+			if (wk->wk_type == D_BMSAFEMAP)
+				return (WK_BMSAFEMAP(wk));
+	fs = VFSTOUFS(mp)->um_fs;
+	bmsafemaphd = BMSAFEMAP_HASH(fs, cg);
+	if (bmsafemap_find(bmsafemaphd, mp, cg, &bmsafemap) == 1)
+		return (bmsafemap);
 	FREE_LOCK(&lk);
 	bmsafemap = malloc(sizeof(struct bmsafemap),
 		M_BMSAFEMAP, M_SOFTDEP_FLAGS);
 	workitem_alloc(&bmsafemap->sm_list, D_BMSAFEMAP, mp);
 	bmsafemap->sm_buf = bp;
-	LIST_INIT(&bmsafemap->sm_allocdirecthd);
-	LIST_INIT(&bmsafemap->sm_allocindirhd);
 	LIST_INIT(&bmsafemap->sm_inodedephd);
+	LIST_INIT(&bmsafemap->sm_inodedepwr);
 	LIST_INIT(&bmsafemap->sm_newblkhd);
+	LIST_INIT(&bmsafemap->sm_newblkwr);
+	LIST_INIT(&bmsafemap->sm_jaddrefhd);
+	LIST_INIT(&bmsafemap->sm_jnewblkhd);
 	ACQUIRE_LOCK(&lk);
+	if (bmsafemap_find(bmsafemaphd, mp, cg, &collision) == 1) {
+		WORKITEM_FREE(bmsafemap, D_BMSAFEMAP);
+		return (collision);
+	}
+	bmsafemap->sm_cg = cg;
+	LIST_INSERT_HEAD(bmsafemaphd, bmsafemap, sm_hash);
 	WORKLIST_INSERT(&bp->b_dep, &bmsafemap->sm_list);
 	return (bmsafemap);
 }
@@ -1645,9 +4004,9 @@ static struct bmsafemap *
  * unreferenced fragments.
  */ 
 void 
-softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
+softdep_setup_allocdirect(ip, off, newblkno, oldblkno, newsize, oldsize, bp)
 	struct inode *ip;	/* inode to which block is being added */
-	ufs_lbn_t lbn;		/* block pointer within inode */
+	ufs_lbn_t off;		/* block pointer within inode */
 	ufs2_daddr_t newblkno;	/* disk block number being added */
 	ufs2_daddr_t oldblkno;	/* previous block number, 0 unless frag */
 	long newsize;		/* size of new block */
@@ -1656,34 +4015,33 @@ void
 {
 	struct allocdirect *adp, *oldadp;
 	struct allocdirectlst *adphead;
-	struct bmsafemap *bmsafemap;
+	struct freefrag *freefrag;
 	struct inodedep *inodedep;
 	struct pagedep *pagedep;
+	struct jnewblk *jnewblk;
 	struct newblk *newblk;
 	struct mount *mp;
+	ufs_lbn_t lbn;
 
+	lbn = bp->b_lblkno;
 	mp = UFSTOVFS(ip->i_ump);
-	adp = malloc(sizeof(struct allocdirect),
-		M_ALLOCDIRECT, M_SOFTDEP_FLAGS|M_ZERO);
-	workitem_alloc(&adp->ad_list, D_ALLOCDIRECT, mp);
-	adp->ad_lbn = lbn;
-	adp->ad_newblkno = newblkno;
-	adp->ad_oldblkno = oldblkno;
-	adp->ad_newsize = newsize;
-	adp->ad_oldsize = oldsize;
-	adp->ad_state = ATTACHED;
-	LIST_INIT(&adp->ad_newdirblk);
-	if (newblkno == oldblkno)
-		adp->ad_freefrag = NULL;
+	if (oldblkno && oldblkno != newblkno)
+		freefrag = newfreefrag(ip, oldblkno, oldsize, lbn);
 	else
-		adp->ad_freefrag = newfreefrag(ip, oldblkno, oldsize);
+		freefrag = NULL;
 
 	ACQUIRE_LOCK(&lk);
-	if (lbn >= NDADDR) {
+	if (off >= NDADDR) {
+		if (lbn > 0)
+			panic("softdep_setup_allocdirect: bad lbn %jd, off %jd",
+			    lbn, off);
 		/* allocating an indirect block */
 		if (oldblkno != 0)
 			panic("softdep_setup_allocdirect: non-zero indir");
 	} else {
+		if (off != lbn)
+			panic("softdep_setup_allocdirect: lbn %jd != off %jd",
+			    lbn, off);
 		/*
 		 * Allocating a direct block.
 		 *
@@ -1692,26 +4050,39 @@ void
 		 * deletions.
 		 */
 		if ((ip->i_mode & IFMT) == IFDIR &&
-		    pagedep_lookup(ip, lbn, DEPALLOC, &pagedep) == 0)
+		    pagedep_lookup(mp, ip->i_number, off, DEPALLOC,
+		    &pagedep) == 0)
 			WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list);
 	}
-	if (newblk_lookup(ip->i_fs, newblkno, 0, &newblk) == 0)
+	if (newblk_lookup(mp, newblkno, 0, &newblk) == 0)
 		panic("softdep_setup_allocdirect: lost block");
-	if (newblk->nb_state == DEPCOMPLETE) {
-		adp->ad_state |= DEPCOMPLETE;
-		adp->ad_buf = NULL;
-	} else {
-		bmsafemap = newblk->nb_bmsafemap;
-		adp->ad_buf = bmsafemap->sm_buf;
-		LIST_REMOVE(newblk, nb_deps);
-		LIST_INSERT_HEAD(&bmsafemap->sm_allocdirecthd, adp, ad_deps);
+	KASSERT(newblk->nb_list.wk_type == D_NEWBLK,
+	    ("softdep_setup_allocdirect: newblk already initialized"));
+	/*
+	 * Convert the newblk to an allocdirect.
+	 */
+	newblk->nb_list.wk_type = D_ALLOCDIRECT;
+	adp = (struct allocdirect *)newblk;
+	newblk->nb_freefrag = freefrag;
+	adp->ad_offset = off;
+	adp->ad_oldblkno = oldblkno;
+	adp->ad_newsize = newsize;
+	adp->ad_oldsize = oldsize;
+
+	/*
+	 * Finish initializing the journal.
+	 */
+	if ((jnewblk = newblk->nb_jnewblk) != NULL) {
+		jnewblk->jn_ino = ip->i_number;
+		jnewblk->jn_lbn = lbn;
+		add_to_journal(&jnewblk->jn_list);
 	}
-	LIST_REMOVE(newblk, nb_hash);
-	free(newblk, M_NEWBLK);
-
+	if (freefrag && freefrag->ff_jfreefrag != NULL)
+		add_to_journal(&freefrag->ff_jfreefrag->fr_list);
 	inodedep_lookup(mp, ip->i_number, DEPALLOC | NODELAY, &inodedep);
 	adp->ad_inodedep = inodedep;
-	WORKLIST_INSERT(&bp->b_dep, &adp->ad_list);
+
+	WORKLIST_INSERT(&bp->b_dep, &newblk->nb_list);
 	/*
 	 * The list of allocdirects must be kept in sorted and ascending
 	 * order so that the rollback routines can quickly determine the
@@ -1726,24 +4097,25 @@ void
 	 */
 	adphead = &inodedep->id_newinoupdt;
 	oldadp = TAILQ_LAST(adphead, allocdirectlst);
-	if (oldadp == NULL || oldadp->ad_lbn <= lbn) {
+	if (oldadp == NULL || oldadp->ad_offset <= off) {
 		/* insert at end of list */
 		TAILQ_INSERT_TAIL(adphead, adp, ad_next);
-		if (oldadp != NULL && oldadp->ad_lbn == lbn)
+		if (oldadp != NULL && oldadp->ad_offset == off)
 			allocdirect_merge(adphead, adp, oldadp);
 		FREE_LOCK(&lk);
 		return;
 	}
 	TAILQ_FOREACH(oldadp, adphead, ad_next) {
-		if (oldadp->ad_lbn >= lbn)
+		if (oldadp->ad_offset >= off)
 			break;
 	}
 	if (oldadp == NULL)
 		panic("softdep_setup_allocdirect: lost entry");
 	/* insert in middle of list */
 	TAILQ_INSERT_BEFORE(oldadp, adp, ad_next);
-	if (oldadp->ad_lbn == lbn)
+	if (oldadp->ad_offset == off)
 		allocdirect_merge(adphead, adp, oldadp);
+
 	FREE_LOCK(&lk);
 }
 
@@ -1761,10 +4133,11 @@ allocdirect_merge(adphead, newadp, oldadp)
 	struct freefrag *freefrag;
 	struct newdirblk *newdirblk;
 
+	freefrag = NULL;
 	mtx_assert(&lk, MA_OWNED);
 	if (newadp->ad_oldblkno != oldadp->ad_newblkno ||
 	    newadp->ad_oldsize != oldadp->ad_newsize ||
-	    newadp->ad_lbn >= NDADDR)
+	    newadp->ad_offset >= NDADDR)
 		panic("%s %jd != new %jd || old size %ld != new %ld",
 		    "allocdirect_merge: old blkno",
 		    (intmax_t)newadp->ad_oldblkno,
@@ -1779,7 +4152,7 @@ allocdirect_merge(adphead, newadp, oldadp)
 	 * This action is done by swapping the freefrag dependencies.
 	 * The new dependency gains the old one's freefrag, and the
 	 * old one gets the new one and then immediately puts it on
-	 * the worklist when it is freed by free_allocdirect. It is
+	 * the worklist when it is freed by free_newblk. It is
 	 * not possible to do this swap when the old dependency had a
 	 * non-zero size but no previous fragment to free. This condition
 	 * arises when the new block is an extension of the old block.
@@ -1788,8 +4161,8 @@ allocdirect_merge(adphead, newadp, oldadp)
 	 * the old dependency, so cannot legitimately be freed until the
 	 * conditions for the new dependency are fulfilled.
 	 */
+	freefrag = newadp->ad_freefrag;
 	if (oldadp->ad_freefrag != NULL || oldadp->ad_oldblkno == 0) {
-		freefrag = newadp->ad_freefrag;
 		newadp->ad_freefrag = oldadp->ad_freefrag;
 		oldadp->ad_freefrag = freefrag;
 	}
@@ -1804,32 +4177,118 @@ allocdirect_merge(adphead, newadp, oldadp)
 			panic("allocdirect_merge: extra newdirblk");
 		WORKLIST_INSERT(&newadp->ad_newdirblk, &newdirblk->db_list);
 	}
-	free_allocdirect(adphead, oldadp, 0);
+	TAILQ_REMOVE(adphead, oldadp, ad_next);
+	/*
+	 * We need to move any journal dependencies over to the freefrag
+	 * that releases this block if it exists.  Otherwise we are
+	 * extending an existing block and we'll wait until that is
+	 * complete to release the journal space and extend the
+	 * new journal to cover this old space as well.
+	 */
+	if (freefrag == NULL) {
+		struct jnewblk *jnewblk;
+		struct jnewblk *njnewblk;
+
+		if (oldadp->ad_newblkno != newadp->ad_newblkno)
+			panic("allocdirect_merge: %jd != %jd",
+			    oldadp->ad_newblkno, newadp->ad_newblkno);
+		jnewblk = oldadp->ad_block.nb_jnewblk;
+		cancel_newblk(&oldadp->ad_block, &newadp->ad_block.nb_jwork);
+		/*
+		 * We have an unwritten jnewblk, we need to merge the
+		 * frag bits with our own.  The newer adp's journal can not
+		 * be written prior to the old one so no need to check for
+		 * it here.
+		 */
+		if (jnewblk) {
+			njnewblk = newadp->ad_block.nb_jnewblk;
+			if (njnewblk == NULL)
+				panic("allocdirect_merge: No jnewblk");
+			if (jnewblk->jn_state & UNDONE) {
+				njnewblk->jn_state |= UNDONE | NEWBLOCK;
+				njnewblk->jn_state &= ~ATTACHED;
+				jnewblk->jn_state &= ~UNDONE;
+			}
+			njnewblk->jn_oldfrags = jnewblk->jn_oldfrags;
+			WORKLIST_REMOVE(&jnewblk->jn_list);
+			jnewblk->jn_state |= ATTACHED | COMPLETE;
+			free_jnewblk(jnewblk);
+		}
+	} else {
+		/*
+		 * We can skip journaling for this freefrag and just complete
+		 * any pending journal work for the allocdirect that is being
+		 * removed after the freefrag completes.
+		 */
+		if (freefrag->ff_jfreefrag)
+			cancel_jfreefrag(freefrag->ff_jfreefrag);
+		cancel_newblk(&oldadp->ad_block, &freefrag->ff_jwork);
+	}
+	free_newblk(&oldadp->ad_block);
 }
-		
+
 /*
- * Allocate a new freefrag structure if needed.
+ * Allocate a jfreefrag structure to journal a single block free.
  */
+static struct jfreefrag *
+newjfreefrag(freefrag, ip, blkno, size, lbn)
+	struct freefrag *freefrag;
+	struct inode *ip;
+	ufs2_daddr_t blkno;
+	long size;
+	ufs_lbn_t lbn;
+{
+	struct jfreefrag *jfreefrag;
+	struct fs *fs;
+
+	fs = ip->i_fs;
+	jfreefrag = malloc(sizeof(struct jfreefrag), M_JFREEFRAG,
+	    M_SOFTDEP_FLAGS);
+	workitem_alloc(&jfreefrag->fr_list, D_JFREEFRAG, UFSTOVFS(ip->i_ump));
+	jfreefrag->fr_jsegdep = newjsegdep(&jfreefrag->fr_list);
+	jfreefrag->fr_state = ATTACHED | DEPCOMPLETE;
+	jfreefrag->fr_ino = ip->i_number;
+	jfreefrag->fr_lbn = lbn;
+	jfreefrag->fr_blkno = blkno;
+	jfreefrag->fr_frags = numfrags(fs, size);
+	jfreefrag->fr_freefrag = freefrag;
+
+	return (jfreefrag);
+}
+
+/*
+ * Allocate a new freefrag structure.
+ */
 static struct freefrag *
-newfreefrag(ip, blkno, size)
+newfreefrag(ip, blkno, size, lbn)
 	struct inode *ip;
 	ufs2_daddr_t blkno;
 	long size;
+	ufs_lbn_t lbn;
 {
 	struct freefrag *freefrag;
 	struct fs *fs;
 
-	if (blkno == 0)
-		return (NULL);
 	fs = ip->i_fs;
 	if (fragnum(fs, blkno) + numfrags(fs, size) > fs->fs_frag)
 		panic("newfreefrag: frag size");
 	freefrag = malloc(sizeof(struct freefrag),
-		M_FREEFRAG, M_SOFTDEP_FLAGS);
+	    M_FREEFRAG, M_SOFTDEP_FLAGS);
 	workitem_alloc(&freefrag->ff_list, D_FREEFRAG, UFSTOVFS(ip->i_ump));
+	freefrag->ff_state = ATTACHED;
+	LIST_INIT(&freefrag->ff_jwork);
 	freefrag->ff_inum = ip->i_number;
 	freefrag->ff_blkno = blkno;
 	freefrag->ff_fragsize = size;
+
+	if (fs->fs_flags & FS_SUJ) {
+		freefrag->ff_jfreefrag =
+		    newjfreefrag(freefrag, ip, blkno, size, lbn);
+	} else {
+		freefrag->ff_state |= DEPCOMPLETE;
+		freefrag->ff_jfreefrag = NULL;
+	}
+
 	return (freefrag);
 }
 
@@ -1842,9 +4301,17 @@ handle_workitem_freefrag(freefrag)
 	struct freefrag *freefrag;
 {
 	struct ufsmount *ump = VFSTOUFS(freefrag->ff_list.wk_mp);
+	struct workhead wkhd;
 
+	/*
+	 * It would be illegal to add new completion items to the
+	 * freefrag after it was schedule to be done so it must be
+	 * safe to modify the list head here.
+	 */
+	LIST_INIT(&wkhd);
+	LIST_SWAP(&freefrag->ff_jwork, &wkhd, worklist, wk_list);
 	ffs_blkfree(ump, ump->um_fs, ump->um_devvp, freefrag->ff_blkno,
-	    freefrag->ff_fragsize, freefrag->ff_inum);
+	    freefrag->ff_fragsize, freefrag->ff_inum, &wkhd);
 	ACQUIRE_LOCK(&lk);
 	WORKITEM_FREE(freefrag, D_FREEFRAG);
 	FREE_LOCK(&lk);
@@ -1856,9 +4323,9 @@ handle_workitem_freefrag(freefrag)
  * See the description of softdep_setup_allocdirect above for details.
  */
 void 
-softdep_setup_allocext(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
+softdep_setup_allocext(ip, off, newblkno, oldblkno, newsize, oldsize, bp)
 	struct inode *ip;
-	ufs_lbn_t lbn;
+	ufs_lbn_t off;
 	ufs2_daddr_t newblkno;
 	ufs2_daddr_t oldblkno;
 	long newsize;
@@ -1867,50 +4334,55 @@ void
 {
 	struct allocdirect *adp, *oldadp;
 	struct allocdirectlst *adphead;
-	struct bmsafemap *bmsafemap;
+	struct freefrag *freefrag;
 	struct inodedep *inodedep;
+	struct jnewblk *jnewblk;
 	struct newblk *newblk;
 	struct mount *mp;
+	ufs_lbn_t lbn;
 
+	if (off >= NXADDR)
+		panic("softdep_setup_allocext: lbn %lld > NXADDR",
+		    (long long)off);
+
+	lbn = bp->b_lblkno;
 	mp = UFSTOVFS(ip->i_ump);
-	adp = malloc(sizeof(struct allocdirect),
-		M_ALLOCDIRECT, M_SOFTDEP_FLAGS|M_ZERO);
-	workitem_alloc(&adp->ad_list, D_ALLOCDIRECT, mp);
-	adp->ad_lbn = lbn;
-	adp->ad_newblkno = newblkno;
-	adp->ad_oldblkno = oldblkno;
-	adp->ad_newsize = newsize;
-	adp->ad_oldsize = oldsize;
-	adp->ad_state = ATTACHED | EXTDATA;
-	LIST_INIT(&adp->ad_newdirblk);
-	if (newblkno == oldblkno)
-		adp->ad_freefrag = NULL;
+	if (oldblkno && oldblkno != newblkno)
+		freefrag = newfreefrag(ip, oldblkno, oldsize, lbn);
 	else
-		adp->ad_freefrag = newfreefrag(ip, oldblkno, oldsize);
+		freefrag = NULL;
 
 	ACQUIRE_LOCK(&lk);
-	if (newblk_lookup(ip->i_fs, newblkno, 0, &newblk) == 0)
+	if (newblk_lookup(mp, newblkno, 0, &newblk) == 0)
 		panic("softdep_setup_allocext: lost block");
+	KASSERT(newblk->nb_list.wk_type == D_NEWBLK,
+	    ("softdep_setup_allocext: newblk already initialized"));
+	/*
+	 * Convert the newblk to an allocdirect.
+	 */
+	newblk->nb_list.wk_type = D_ALLOCDIRECT;
+	adp = (struct allocdirect *)newblk;
+	newblk->nb_freefrag = freefrag;
+	adp->ad_offset = off;
+	adp->ad_oldblkno = oldblkno;
+	adp->ad_newsize = newsize;
+	adp->ad_oldsize = oldsize;
+	adp->ad_state |=  EXTDATA;
 
+	/*
+	 * Finish initializing the journal.
+	 */
+	if ((jnewblk = newblk->nb_jnewblk) != NULL) {
+		jnewblk->jn_ino = ip->i_number;
+		jnewblk->jn_lbn = lbn;
+		add_to_journal(&jnewblk->jn_list);
+	}
+	if (freefrag && freefrag->ff_jfreefrag != NULL)
+		add_to_journal(&freefrag->ff_jfreefrag->fr_list);
 	inodedep_lookup(mp, ip->i_number, DEPALLOC | NODELAY, &inodedep);
 	adp->ad_inodedep = inodedep;
 
-	if (newblk->nb_state == DEPCOMPLETE) {
-		adp->ad_state |= DEPCOMPLETE;
-		adp->ad_buf = NULL;
-	} else {
-		bmsafemap = newblk->nb_bmsafemap;
-		adp->ad_buf = bmsafemap->sm_buf;
-		LIST_REMOVE(newblk, nb_deps);
-		LIST_INSERT_HEAD(&bmsafemap->sm_allocdirecthd, adp, ad_deps);
-	}
-	LIST_REMOVE(newblk, nb_hash);
-	free(newblk, M_NEWBLK);
-
-	WORKLIST_INSERT(&bp->b_dep, &adp->ad_list);
-	if (lbn >= NXADDR)
-		panic("softdep_setup_allocext: lbn %lld > NXADDR",
-		    (long long)lbn);
+	WORKLIST_INSERT(&bp->b_dep, &newblk->nb_list);
 	/*
 	 * The list of allocdirects must be kept in sorted and ascending
 	 * order so that the rollback routines can quickly determine the
@@ -1925,23 +4397,23 @@ void
 	 */
 	adphead = &inodedep->id_newextupdt;
 	oldadp = TAILQ_LAST(adphead, allocdirectlst);
-	if (oldadp == NULL || oldadp->ad_lbn <= lbn) {
+	if (oldadp == NULL || oldadp->ad_offset <= off) {
 		/* insert at end of list */
 		TAILQ_INSERT_TAIL(adphead, adp, ad_next);
-		if (oldadp != NULL && oldadp->ad_lbn == lbn)
+		if (oldadp != NULL && oldadp->ad_offset == off)
 			allocdirect_merge(adphead, adp, oldadp);
 		FREE_LOCK(&lk);
 		return;
 	}
 	TAILQ_FOREACH(oldadp, adphead, ad_next) {
-		if (oldadp->ad_lbn >= lbn)
+		if (oldadp->ad_offset >= off)
 			break;
 	}
 	if (oldadp == NULL)
 		panic("softdep_setup_allocext: lost entry");
 	/* insert in middle of list */
 	TAILQ_INSERT_BEFORE(oldadp, adp, ad_next);
-	if (oldadp->ad_lbn == lbn)
+	if (oldadp->ad_offset == off)
 		allocdirect_merge(adphead, adp, oldadp);
 	FREE_LOCK(&lk);
 }
@@ -1975,22 +4447,39 @@ void
  * Allocate a new allocindir structure.
  */
 static struct allocindir *
-newallocindir(ip, ptrno, newblkno, oldblkno)
+newallocindir(ip, ptrno, newblkno, oldblkno, lbn)
 	struct inode *ip;	/* inode for file being extended */
 	int ptrno;		/* offset of pointer in indirect block */
 	ufs2_daddr_t newblkno;	/* disk block number being added */
 	ufs2_daddr_t oldblkno;	/* previous block number, 0 if none */
+	ufs_lbn_t lbn;
 {
+	struct newblk *newblk;
 	struct allocindir *aip;
+	struct freefrag *freefrag;
+	struct jnewblk *jnewblk;
 
-	aip = malloc(sizeof(struct allocindir),
-		M_ALLOCINDIR, M_SOFTDEP_FLAGS|M_ZERO);
-	workitem_alloc(&aip->ai_list, D_ALLOCINDIR, UFSTOVFS(ip->i_ump));
-	aip->ai_state = ATTACHED;
+	if (oldblkno)
+		freefrag = newfreefrag(ip, oldblkno, ip->i_fs->fs_bsize, lbn);
+	else
+		freefrag = NULL;
+	ACQUIRE_LOCK(&lk);
+	if (newblk_lookup(UFSTOVFS(ip->i_ump), newblkno, 0, &newblk) == 0)
+		panic("new_allocindir: lost block");
+	KASSERT(newblk->nb_list.wk_type == D_NEWBLK,
+	    ("newallocindir: newblk already initialized"));
+	newblk->nb_list.wk_type = D_ALLOCINDIR;
+	newblk->nb_freefrag = freefrag;
+	aip = (struct allocindir *)newblk;
 	aip->ai_offset = ptrno;
-	aip->ai_newblkno = newblkno;
 	aip->ai_oldblkno = oldblkno;
-	aip->ai_freefrag = newfreefrag(ip, oldblkno, ip->i_fs->fs_bsize);
+	if ((jnewblk = newblk->nb_jnewblk) != NULL) {
+		jnewblk->jn_ino = ip->i_number;
+		jnewblk->jn_lbn = lbn;
+		add_to_journal(&jnewblk->jn_list);
+	}
+	if (freefrag && freefrag->ff_jfreefrag != NULL)
+		add_to_journal(&freefrag->ff_jfreefrag->fr_list);
 	return (aip);
 }
 
@@ -2008,22 +4497,28 @@ softdep_setup_allocindir_page(ip, lbn, bp, ptrno,
 	ufs2_daddr_t oldblkno;	/* previous block number, 0 if none */
 	struct buf *nbp;	/* buffer holding allocated page */
 {
+	struct inodedep *inodedep;
 	struct allocindir *aip;
 	struct pagedep *pagedep;
+	struct mount *mp;
 
+	if (lbn != nbp->b_lblkno)
+		panic("softdep_setup_allocindir_page: lbn %jd != lblkno %jd",
+		    lbn, bp->b_lblkno);
 	ASSERT_VOP_LOCKED(ITOV(ip), "softdep_setup_allocindir_page");
-	aip = newallocindir(ip, ptrno, newblkno, oldblkno);
-	ACQUIRE_LOCK(&lk);
+	mp = UFSTOVFS(ip->i_ump);
+	aip = newallocindir(ip, ptrno, newblkno, oldblkno, lbn);
+	(void) inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
 	/*
 	 * If we are allocating a directory page, then we must
 	 * allocate an associated pagedep to track additions and
 	 * deletions.
 	 */
 	if ((ip->i_mode & IFMT) == IFDIR &&
-	    pagedep_lookup(ip, lbn, DEPALLOC, &pagedep) == 0)
+	    pagedep_lookup(mp, ip->i_number, lbn, DEPALLOC, &pagedep) == 0)
 		WORKLIST_INSERT(&nbp->b_dep, &pagedep->pd_list);
-	WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list);
-	setup_allocindir_phase2(bp, ip, aip);
+	WORKLIST_INSERT(&nbp->b_dep, &aip->ai_block.nb_list);
+	setup_allocindir_phase2(bp, ip, inodedep, aip, lbn);
 	FREE_LOCK(&lk);
 }
 
@@ -2039,38 +4534,68 @@ softdep_setup_allocindir_meta(nbp, ip, bp, ptrno,
 	int ptrno;		/* offset of pointer in indirect block */
 	ufs2_daddr_t newblkno;	/* disk block number being added */
 {
+	struct inodedep *inodedep;
 	struct allocindir *aip;
+	ufs_lbn_t lbn;
 
+	lbn = nbp->b_lblkno;
 	ASSERT_VOP_LOCKED(ITOV(ip), "softdep_setup_allocindir_meta");
-	aip = newallocindir(ip, ptrno, newblkno, 0);
-	ACQUIRE_LOCK(&lk);
-	WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list);
-	setup_allocindir_phase2(bp, ip, aip);
+	aip = newallocindir(ip, ptrno, newblkno, 0, lbn);
+	inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, DEPALLOC, &inodedep);
+	WORKLIST_INSERT(&nbp->b_dep, &aip->ai_block.nb_list);
+	setup_allocindir_phase2(bp, ip, inodedep, aip, lbn);
 	FREE_LOCK(&lk);
 }
 
+static void
+indirdep_complete(indirdep)
+	struct indirdep *indirdep;
+{
+	struct allocindir *aip;
+
+	LIST_REMOVE(indirdep, ir_next);
+	indirdep->ir_state &= ~ONDEPLIST;
+
+	while ((aip = LIST_FIRST(&indirdep->ir_completehd)) != NULL) {
+		LIST_REMOVE(aip, ai_next);
+		free_newblk(&aip->ai_block);
+	}
+	/*
+	 * If this indirdep is not attached to a buf it was simply waiting
+	 * on completion to clear completehd.  free_indirdep() asserts
+	 * that nothing is dangling.
+	 */
+	if ((indirdep->ir_state & ONWORKLIST) == 0)
+		free_indirdep(indirdep);
+}
+
 /*
  * Called to finish the allocation of the "aip" allocated
  * by one of the two routines above.
  */
 static void 
-setup_allocindir_phase2(bp, ip, aip)
+setup_allocindir_phase2(bp, ip, inodedep, aip, lbn)
 	struct buf *bp;		/* in-memory copy of the indirect block */
 	struct inode *ip;	/* inode for file being extended */
+	struct inodedep *inodedep; /* Inodedep for ip */
 	struct allocindir *aip;	/* allocindir allocated by the above routines */
+	ufs_lbn_t lbn;		/* Logical block number for this block. */
 {
 	struct worklist *wk;
+	struct fs *fs;
+	struct newblk *newblk;
 	struct indirdep *indirdep, *newindirdep;
-	struct bmsafemap *bmsafemap;
 	struct allocindir *oldaip;
 	struct freefrag *freefrag;
-	struct newblk *newblk;
+	struct mount *mp;
 	ufs2_daddr_t blkno;
 
+	mp = UFSTOVFS(ip->i_ump);
+	fs = ip->i_fs;
 	mtx_assert(&lk, MA_OWNED);
 	if (bp->b_lblkno >= 0)
 		panic("setup_allocindir_phase2: not indir blk");
-	for (indirdep = NULL, newindirdep = NULL; ; ) {
+	for (freefrag = NULL, indirdep = NULL, newindirdep = NULL; ; ) {
 		LIST_FOREACH(wk, &bp->b_dep, wk_list) {
 			if (wk->wk_type != D_INDIRDEP)
 				continue;
@@ -2079,49 +4604,41 @@ static void
 		}
 		if (indirdep == NULL && newindirdep) {
 			indirdep = newindirdep;
+			newindirdep = NULL;
 			WORKLIST_INSERT(&bp->b_dep, &indirdep->ir_list);
-			newindirdep = NULL;
+			if (newblk_lookup(mp, dbtofsb(fs, bp->b_blkno), 0,
+			    &newblk)) {
+				indirdep->ir_state |= ONDEPLIST;
+				LIST_INSERT_HEAD(&newblk->nb_indirdeps,
+				    indirdep, ir_next);
+			} else
+				indirdep->ir_state |= DEPCOMPLETE;
 		}
 		if (indirdep) {
-			if (newblk_lookup(ip->i_fs, aip->ai_newblkno, 0,
-			    &newblk) == 0)
-				panic("setup_allocindir: lost block");
-			if (newblk->nb_state == DEPCOMPLETE) {
-				aip->ai_state |= DEPCOMPLETE;
-				aip->ai_buf = NULL;
-			} else {
-				bmsafemap = newblk->nb_bmsafemap;
-				aip->ai_buf = bmsafemap->sm_buf;
-				LIST_REMOVE(newblk, nb_deps);
-				LIST_INSERT_HEAD(&bmsafemap->sm_allocindirhd,
-				    aip, ai_deps);
-			}
-			LIST_REMOVE(newblk, nb_hash);
-			free(newblk, M_NEWBLK);
 			aip->ai_indirdep = indirdep;
 			/*
 			 * Check to see if there is an existing dependency
 			 * for this block. If there is, merge the old
-			 * dependency into the new one.
+			 * dependency into the new one.  This happens
+			 * as a result of reallocblk only.
 			 */
 			if (aip->ai_oldblkno == 0)
 				oldaip = NULL;
 			else
 
-				LIST_FOREACH(oldaip, &indirdep->ir_deplisthd, ai_next)
+				LIST_FOREACH(oldaip, &indirdep->ir_deplisthd,
+				    ai_next)
 					if (oldaip->ai_offset == aip->ai_offset)
 						break;
-			freefrag = NULL;
-			if (oldaip != NULL) {
-				if (oldaip->ai_newblkno != aip->ai_oldblkno)
-					panic("setup_allocindir_phase2: blkno");
-				aip->ai_oldblkno = oldaip->ai_oldblkno;
-				freefrag = aip->ai_freefrag;
-				aip->ai_freefrag = oldaip->ai_freefrag;
-				oldaip->ai_freefrag = NULL;
-				free_allocindir(oldaip, NULL);
-			}
+			if (oldaip != NULL)
+				freefrag = allocindir_merge(aip, oldaip);
 			LIST_INSERT_HEAD(&indirdep->ir_deplisthd, aip, ai_next);
+			KASSERT(aip->ai_offset >= 0 &&
+			    aip->ai_offset < NINDIR(ip->i_ump->um_fs),
+			    ("setup_allocindir_phase2: Bad offset %d",
+			    aip->ai_offset));
+			KASSERT(indirdep->ir_savebp != NULL,
+			    ("setup_allocindir_phase2 NULL ir_savebp"));
 			if (ip->i_ump->um_fstype == UFS1)
 				((ufs1_daddr_t *)indirdep->ir_savebp->b_data)
 				    [aip->ai_offset] = aip->ai_oldblkno;
@@ -2148,13 +4665,16 @@ static void
 		}
 		newindirdep = malloc(sizeof(struct indirdep),
 			M_INDIRDEP, M_SOFTDEP_FLAGS);
-		workitem_alloc(&newindirdep->ir_list, D_INDIRDEP,
-		    UFSTOVFS(ip->i_ump));
+		workitem_alloc(&newindirdep->ir_list, D_INDIRDEP, mp);
 		newindirdep->ir_state = ATTACHED;
 		if (ip->i_ump->um_fstype == UFS1)
 			newindirdep->ir_state |= UFS1FMT;
+		newindirdep->ir_saveddata = NULL;
 		LIST_INIT(&newindirdep->ir_deplisthd);
 		LIST_INIT(&newindirdep->ir_donehd);
+		LIST_INIT(&newindirdep->ir_writehd);
+		LIST_INIT(&newindirdep->ir_completehd);
+		LIST_INIT(&newindirdep->ir_jwork);
 		if (bp->b_blkno == bp->b_lblkno) {
 			ufs_bmaparray(bp->b_vp, bp->b_lblkno, &blkno, bp,
 			    NULL, NULL);
@@ -2169,6 +4689,51 @@ static void
 }
 
 /*
+ * Merge two allocindirs which refer to the same block.  Move newblock
+ * dependencies and setup the freefrags appropriately.
+ */
+static struct freefrag *
+allocindir_merge(aip, oldaip)
+	struct allocindir *aip;
+	struct allocindir *oldaip;
+{
+	struct newdirblk *newdirblk;
+	struct freefrag *freefrag;
+	struct worklist *wk;
+
+	if (oldaip->ai_newblkno != aip->ai_oldblkno)
+		panic("allocindir_merge: blkno");
+	aip->ai_oldblkno = oldaip->ai_oldblkno;
+	freefrag = aip->ai_freefrag;
+	aip->ai_freefrag = oldaip->ai_freefrag;
+	oldaip->ai_freefrag = NULL;
+	KASSERT(freefrag != NULL, ("setup_allocindir_phase2: No freefrag"));
+	/*
+	 * If we are tracking a new directory-block allocation,
+	 * move it from the old allocindir to the new allocindir.
+	 */
+	if ((wk = LIST_FIRST(&oldaip->ai_newdirblk)) != NULL) {
+		newdirblk = WK_NEWDIRBLK(wk);
+		WORKLIST_REMOVE(&newdirblk->db_list);
+		if (!LIST_EMPTY(&oldaip->ai_newdirblk))
+			panic("allocindir_merge: extra newdirblk");
+		WORKLIST_INSERT(&aip->ai_newdirblk, &newdirblk->db_list);
+	}
+	/*
+	 * We can skip journaling for this freefrag and just complete
+	 * any pending journal work for the allocindir that is being
+	 * removed after the freefrag completes.
+	 */
+	if (freefrag->ff_jfreefrag)
+		cancel_jfreefrag(freefrag->ff_jfreefrag);
+	LIST_REMOVE(oldaip, ai_next);
+	cancel_newblk(&oldaip->ai_block, &freefrag->ff_jwork);
+	free_newblk(&oldaip->ai_block);
+
+	return (freefrag);
+}
+
+/*
  * Block de-allocation dependencies.
  * 
  * When blocks are de-allocated, the on-disk pointers must be nullified before
@@ -2206,6 +4771,7 @@ softdep_setup_freeblocks(ip, length, flags)
 	struct freeblks *freeblks;
 	struct inodedep *inodedep;
 	struct allocdirect *adp;
+	struct jfreeblk *jfreeblk;
 	struct bufobj *bo;
 	struct vnode *vp;
 	struct buf *bp;
@@ -2213,6 +4779,13 @@ softdep_setup_freeblocks(ip, length, flags)
 	ufs2_daddr_t extblocks, datablocks;
 	struct mount *mp;
 	int i, delay, error;
+	ufs2_daddr_t blkno;
+	ufs_lbn_t tmpval;
+	ufs_lbn_t lbn;
+	long oldextsize;
+	long oldsize;
+	int frags;
+	int needj;
 
 	fs = ip->i_fs;
 	mp = UFSTOVFS(ip->i_ump);
@@ -2221,32 +4794,53 @@ softdep_setup_freeblocks(ip, length, flags)
 	freeblks = malloc(sizeof(struct freeblks),
 		M_FREEBLKS, M_SOFTDEP_FLAGS|M_ZERO);
 	workitem_alloc(&freeblks->fb_list, D_FREEBLKS, mp);
+	LIST_INIT(&freeblks->fb_jfreeblkhd);
+	LIST_INIT(&freeblks->fb_jwork);
 	freeblks->fb_state = ATTACHED;
 	freeblks->fb_uid = ip->i_uid;
 	freeblks->fb_previousinum = ip->i_number;
 	freeblks->fb_devvp = ip->i_devvp;
+	freeblks->fb_chkcnt = 0;
 	ACQUIRE_LOCK(&lk);
+	/*
+	 * If we're truncating a removed file that will never be written
+	 * we don't need to journal the block frees.  The canceled journals
+	 * for the allocations will suffice.
+	 */
+	inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
+	if ((inodedep->id_state & (DEPCOMPLETE | UNLINKED)) == UNLINKED ||
+	    (fs->fs_flags & FS_SUJ) == 0)
+		needj = 0;
+	else
+		needj = 1;
 	num_freeblkdep++;
 	FREE_LOCK(&lk);
 	extblocks = 0;
 	if (fs->fs_magic == FS_UFS2_MAGIC)
 		extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize));
 	datablocks = DIP(ip, i_blocks) - extblocks;
-	if ((flags & IO_NORMAL) == 0) {
-		freeblks->fb_oldsize = 0;
-		freeblks->fb_chkcnt = 0;
-	} else {
-		freeblks->fb_oldsize = ip->i_size;
+	if ((flags & IO_NORMAL) != 0) {
+		oldsize = ip->i_size;
 		ip->i_size = 0;
 		DIP_SET(ip, i_size, 0);
 		freeblks->fb_chkcnt = datablocks;
 		for (i = 0; i < NDADDR; i++) {
-			freeblks->fb_dblks[i] = DIP(ip, i_db[i]);
+			blkno = DIP(ip, i_db[i]);
 			DIP_SET(ip, i_db[i], 0);
+			if (blkno == 0)
+				continue;
+			frags = sblksize(fs, oldsize, i);
+			frags = numfrags(fs, frags);
+			newfreework(freeblks, NULL, i, blkno, frags, needj);
 		}
-		for (i = 0; i < NIADDR; i++) {
-			freeblks->fb_iblks[i] = DIP(ip, i_ib[i]);
+		for (i = 0, tmpval = NINDIR(fs), lbn = NDADDR; i < NIADDR;
+		    i++, tmpval *= NINDIR(fs)) {
+			blkno = DIP(ip, i_ib[i]);
 			DIP_SET(ip, i_ib[i], 0);
+			if (blkno) 
+				newfreework(freeblks, NULL, -lbn - i, blkno,
+				    fs->fs_frag, needj);
+			lbn += tmpval;
 		}
 		/*
 		 * If the file was removed, then the space being freed was
@@ -2259,17 +4853,23 @@ softdep_setup_freeblocks(ip, length, flags)
 			UFS_UNLOCK(ip->i_ump);
 		}
 	}
-	if ((flags & IO_EXT) == 0) {
-		freeblks->fb_oldextsize = 0;
-	} else {
-		freeblks->fb_oldextsize = ip->i_din2->di_extsize;
+	if ((flags & IO_EXT) != 0) {
+		oldextsize = ip->i_din2->di_extsize;
 		ip->i_din2->di_extsize = 0;
 		freeblks->fb_chkcnt += extblocks;
 		for (i = 0; i < NXADDR; i++) {
-			freeblks->fb_eblks[i] = ip->i_din2->di_extb[i];
+			blkno = ip->i_din2->di_extb[i];
 			ip->i_din2->di_extb[i] = 0;
+			if (blkno == 0)
+				continue;
+			frags = sblksize(fs, oldextsize, i);
+			frags = numfrags(fs, frags);
+			newfreework(freeblks, NULL, -1 - i, blkno, frags,
+			    needj);
 		}
 	}
+	if (LIST_EMPTY(&freeblks->fb_jfreeblkhd))
+		needj = 0;
 	DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - freeblks->fb_chkcnt);
 	/*
 	 * Push the zero'ed inode to to its disk buffer so that we are free
@@ -2304,7 +4904,9 @@ softdep_setup_freeblocks(ip, length, flags)
 	 */
 	delay = (inodedep->id_state & DEPCOMPLETE);
 	if (delay)
-		WORKLIST_INSERT(&inodedep->id_bufwait, &freeblks->fb_list);
+		WORKLIST_INSERT(&bp->b_dep, &freeblks->fb_list);
+	else if (needj)
+		freeblks->fb_state |= DEPCOMPLETE | COMPLETE;
 	/*
 	 * Because the file length has been truncated to zero, any
 	 * pending block allocation dependency structures associated
@@ -2318,14 +4920,19 @@ softdep_setup_freeblocks(ip, length, flags)
 		merge_inode_lists(&inodedep->id_newinoupdt,
 		    &inodedep->id_inoupdt);
 		while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0)
-			free_allocdirect(&inodedep->id_inoupdt, adp, delay);
+			cancel_allocdirect(&inodedep->id_inoupdt, adp,
+			    freeblks, delay);
 	}
 	if (flags & IO_EXT) {
 		merge_inode_lists(&inodedep->id_newextupdt,
 		    &inodedep->id_extupdt);
 		while ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != 0)
-			free_allocdirect(&inodedep->id_extupdt, adp, delay);
+			cancel_allocdirect(&inodedep->id_extupdt, adp,
+			    freeblks, delay);
 	}
+	LIST_FOREACH(jfreeblk, &freeblks->fb_jfreeblkhd, jf_deps)
+		add_to_journal(&jfreeblk->jf_list);
+
 	FREE_LOCK(&lk);
 	bdwrite(bp);
 	/*
@@ -2349,9 +4956,9 @@ restart:
 		BO_UNLOCK(bo);
 		ACQUIRE_LOCK(&lk);
 		(void) inodedep_lookup(mp, ip->i_number, 0, &inodedep);
-		deallocate_dependencies(bp, inodedep);
+		if (deallocate_dependencies(bp, inodedep, freeblks))
+			bp->b_flags |= B_INVAL | B_NOCACHE;
 		FREE_LOCK(&lk);
-		bp->b_flags |= B_INVAL | B_NOCACHE;
 		brelse(bp);
 		BO_LOCK(bo);
 		goto restart;
@@ -2361,7 +4968,7 @@ restart:
 	if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) != 0)
 		(void) free_inodedep(inodedep);
 
-	if(delay) {
+	if (delay) {
 		freeblks->fb_state |= DEPCOMPLETE;
 		/*
 		 * If the inode with zeroed block pointers is now on disk
@@ -2371,16 +4978,16 @@ restart:
 		 * the request here than in the !delay case.
 		 */  
 		if ((freeblks->fb_state & ALLCOMPLETE) == ALLCOMPLETE)
-			add_to_worklist(&freeblks->fb_list);
+			add_to_worklist(&freeblks->fb_list, 1);
 	}
 
 	FREE_LOCK(&lk);
 	/*
-	 * If the inode has never been written to disk (delay == 0),
-	 * then we can process the freeblks now that we have deleted
-	 * the dependencies.
+	 * If the inode has never been written to disk (delay == 0) and
+	 * we're not waiting on any journal writes, then we can process the
+	 * freeblks now that we have deleted the dependencies.
 	 */
-	if (!delay)
+	if (!delay && !needj)
 		handle_workitem_freeblocks(freeblks, 0);
 }
 
@@ -2389,19 +4996,23 @@ restart:
  * be reallocated to a new vnode. The buffer must be locked, thus,
  * no I/O completion operations can occur while we are manipulating
  * its associated dependencies. The mutex is held so that other I/O's
- * associated with related dependencies do not occur.
+ * associated with related dependencies do not occur.  Returns 1 if
+ * all dependencies were cleared, 0 otherwise.
  */
-static void
-deallocate_dependencies(bp, inodedep)
+static int
+deallocate_dependencies(bp, inodedep, freeblks)
 	struct buf *bp;
 	struct inodedep *inodedep;
+	struct freeblks *freeblks;
 {
 	struct worklist *wk;
 	struct indirdep *indirdep;
+	struct newdirblk *newdirblk;
 	struct allocindir *aip;
 	struct pagedep *pagedep;
+	struct jremref *jremref;
+	struct jmvref *jmvref;
 	struct dirrem *dirrem;
-	struct diradd *dap;
 	int i;
 
 	mtx_assert(&lk, MA_OWNED);
@@ -2410,47 +5021,24 @@ restart:
 
 		case D_INDIRDEP:
 			indirdep = WK_INDIRDEP(wk);
-			/*
-			 * None of the indirect pointers will ever be visible,
-			 * so they can simply be tossed. GOINGAWAY ensures
-			 * that allocated pointers will be saved in the buffer
-			 * cache until they are freed. Note that they will
-			 * only be able to be found by their physical address
-			 * since the inode mapping the logical address will
-			 * be gone. The save buffer used for the safe copy
-			 * was allocated in setup_allocindir_phase2 using
-			 * the physical address so it could be used for this
-			 * purpose. Hence we swap the safe copy with the real
-			 * copy, allowing the safe copy to be freed and holding
-			 * on to the real copy for later use in indir_trunc.
-			 */
-			if (indirdep->ir_state & GOINGAWAY)
-				panic("deallocate_dependencies: already gone");
-			indirdep->ir_state |= GOINGAWAY;
-			VFSTOUFS(bp->b_vp->v_mount)->um_numindirdeps += 1;
-			while ((aip = LIST_FIRST(&indirdep->ir_deplisthd)) != 0)
-				free_allocindir(aip, inodedep);
 			if (bp->b_lblkno >= 0 ||
 			    bp->b_blkno != indirdep->ir_savebp->b_lblkno)
 				panic("deallocate_dependencies: not indir");
-			bcopy(bp->b_data, indirdep->ir_savebp->b_data,
-			    bp->b_bcount);
-			WORKLIST_REMOVE(wk);
-			WORKLIST_INSERT(&indirdep->ir_savebp->b_dep, wk);
+			cancel_indirdep(indirdep, bp, inodedep, freeblks);
 			continue;
 
 		case D_PAGEDEP:
 			pagedep = WK_PAGEDEP(wk);
 			/*
-			 * None of the directory additions will ever be
-			 * visible, so they can simply be tossed.
+			 * There should be no directory add dependencies present
+			 * as the directory could not be truncated until all
+			 * children were removed.
 			 */
+			KASSERT(LIST_FIRST(&pagedep->pd_pendinghd) == NULL,
+			    ("deallocate_dependencies: pendinghd != NULL"));
 			for (i = 0; i < DAHASHSZ; i++)
-				while ((dap =
-				    LIST_FIRST(&pagedep->pd_diraddhd[i])))
-					free_diradd(dap);
-			while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != 0)
-				free_diradd(dap);
+				KASSERT(LIST_FIRST(&pagedep->pd_diraddhd[i]) == NULL,
+				    ("deallocate_dependencies: diraddhd != NULL"));
 			/*
 			 * Copy any directory remove dependencies to the list
 			 * to be processed after the zero'ed inode is written.
@@ -2458,36 +5046,47 @@ restart:
 			 * can be dumped directly onto the work list.
 			 */
 			LIST_FOREACH(dirrem, &pagedep->pd_dirremhd, dm_next) {
+				/*
+				 * If there are any dirrems we wait for
+				 * the journal write to complete and
+				 * then restart the buf scan as the lock
+				 * has been dropped.
+				 */
+				while ((jremref =
+				    LIST_FIRST(&dirrem->dm_jremrefhd))
+				    != NULL) {
+					jwait(&jremref->jr_list);
+					return (0);
+				}
 				LIST_REMOVE(dirrem, dm_next);
 				dirrem->dm_dirinum = pagedep->pd_ino;
 				if (inodedep == NULL ||
 				    (inodedep->id_state & ALLCOMPLETE) ==
-				     ALLCOMPLETE)
-					add_to_worklist(&dirrem->dm_list);
-				else
+				     ALLCOMPLETE) {
+					dirrem->dm_state |= COMPLETE;
+					add_to_worklist(&dirrem->dm_list, 0);
+				} else
 					WORKLIST_INSERT(&inodedep->id_bufwait,
 					    &dirrem->dm_list);
 			}
 			if ((pagedep->pd_state & NEWBLOCK) != 0) {
-				LIST_FOREACH(wk, &inodedep->id_bufwait, wk_list)
-					if (wk->wk_type == D_NEWDIRBLK &&
-					    WK_NEWDIRBLK(wk)->db_pagedep ==
-					      pagedep)
-						break;
-				if (wk != NULL) {
-					WORKLIST_REMOVE(wk);
-					free_newdirblk(WK_NEWDIRBLK(wk));
-				} else
-					panic("deallocate_dependencies: "
-					      "lost pagedep");
+				newdirblk = pagedep->pd_newdirblk;
+				WORKLIST_REMOVE(&newdirblk->db_list);
+				free_newdirblk(newdirblk);
 			}
+			while ((jmvref = LIST_FIRST(&pagedep->pd_jmvrefhd))
+			    != NULL) {
+				jwait(&jmvref->jm_list);
+				return (0);
+			}
 			WORKLIST_REMOVE(&pagedep->pd_list);
 			LIST_REMOVE(pagedep, pd_hash);
 			WORKITEM_FREE(pagedep, D_PAGEDEP);
 			continue;
 
 		case D_ALLOCINDIR:
-			free_allocindir(WK_ALLOCINDIR(wk), inodedep);
+			aip = WK_ALLOCINDIR(wk);
+			cancel_allocindir(aip, inodedep, freeblks);
 			continue;
 
 		case D_ALLOCDIRECT:
@@ -2502,46 +5101,155 @@ restart:
 			/* NOTREACHED */
 		}
 	}
+
+	return (1);
 }
 
 /*
- * Free an allocdirect. Generate a new freefrag work request if appropriate.
- * This routine must be called with splbio interrupts blocked.
+ * An allocdirect is being canceled due to a truncate.  We must make sure
+ * the journal entry is released in concert with the blkfree that releases
+ * the storage.  Completed journal entries must not be released until the
+ * space is no longer pointed to by the inode or in the bitmap.
  */
 static void
-free_allocdirect(adphead, adp, delay)
+cancel_allocdirect(adphead, adp, freeblks, delay)
 	struct allocdirectlst *adphead;
 	struct allocdirect *adp;
+	struct freeblks *freeblks;
 	int delay;
 {
+	struct freework *freework;
+	struct newblk *newblk;
+	struct worklist *wk;
+	ufs_lbn_t lbn;
+
+	TAILQ_REMOVE(adphead, adp, ad_next);
+	newblk = (struct newblk *)adp;
+	/*
+	 * If the journal hasn't been written the jnewblk must be passed
+	 * to the call to ffs_freeblk that reclaims the space.  We accomplish
+	 * this by linking the journal dependency into the freework to be
+	 * freed when freework_freeblock() is called.  If the journal has
+	 * been written we can simply reclaim the journal space when the
+	 * freeblks work is complete.
+	 */
+	if (newblk->nb_jnewblk == NULL) {
+		cancel_newblk(newblk, &freeblks->fb_jwork);
+		goto found;
+	}
+	lbn = newblk->nb_jnewblk->jn_lbn;
+	/*
+	 * Find the correct freework structure so it releases the canceled
+	 * journal when the bitmap is cleared.  This preserves rollback
+	 * until the allocation is reverted.
+	 */
+	LIST_FOREACH(wk, &freeblks->fb_freeworkhd, wk_list) {
+		freework = WK_FREEWORK(wk);
+		if (freework->fw_lbn != lbn)
+			continue;
+		cancel_newblk(newblk, &freework->fw_jwork);
+		goto found;
+	}
+	panic("cancel_allocdirect: Freework not found for lbn %jd\n", lbn);
+found:
+	if (delay)
+		WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait,
+		    &newblk->nb_list);
+	else
+		free_newblk(newblk);
+	return;
+}
+
+
+static void
+cancel_newblk(newblk, wkhd)
+	struct newblk *newblk;
+	struct workhead *wkhd;
+{
+	struct indirdep *indirdep;
+	struct allocindir *aip;
+
+	while ((indirdep = LIST_FIRST(&newblk->nb_indirdeps)) != NULL) {
+		indirdep->ir_state &= ~ONDEPLIST;
+		LIST_REMOVE(indirdep, ir_next);
+		/*
+		 * If an indirdep is not on the buf worklist we need to
+		 * free it here as deallocate_dependencies() will never
+		 * find it.  These pointers were never visible on disk and
+		 * can be discarded immediately.
+		 */
+		while ((aip = LIST_FIRST(&indirdep->ir_completehd)) != NULL) {
+			LIST_REMOVE(aip, ai_next);
+			cancel_newblk(&aip->ai_block, wkhd);
+			free_newblk(&aip->ai_block);
+		}
+		/*
+		 * If this indirdep is not attached to a buf it was simply
+		 * waiting on completion to clear completehd.  free_indirdep()
+		 * asserts that nothing is dangling.
+		 */
+		if ((indirdep->ir_state & ONWORKLIST) == 0)
+			free_indirdep(indirdep);
+	}
+	if (newblk->nb_state & ONDEPLIST) {
+		newblk->nb_state &= ~ONDEPLIST;
+		LIST_REMOVE(newblk, nb_deps);
+	}
+	if (newblk->nb_state & ONWORKLIST)
+		WORKLIST_REMOVE(&newblk->nb_list);
+	/*
+	 * If the journal entry hasn't been written we hold onto the dep
+	 * until it is safe to free along with the other journal work.
+	 */
+	if (newblk->nb_jnewblk != NULL) {
+		cancel_jnewblk(newblk->nb_jnewblk, wkhd);
+		newblk->nb_jnewblk = NULL;
+	}
+	if (!LIST_EMPTY(&newblk->nb_jwork))
+		jwork_move(wkhd, &newblk->nb_jwork);
+}
+
+/*
+ * Free a newblk. Generate a new freefrag work request if appropriate.
+ * This must be called after the inode pointer and any direct block pointers
+ * are valid or fully removed via truncate or frag extension.
+ */
+static void
+free_newblk(newblk)
+	struct newblk *newblk;
+{
+	struct indirdep *indirdep;
 	struct newdirblk *newdirblk;
+	struct freefrag *freefrag;
 	struct worklist *wk;
 
 	mtx_assert(&lk, MA_OWNED);
-	if ((adp->ad_state & DEPCOMPLETE) == 0)
-		LIST_REMOVE(adp, ad_deps);
-	TAILQ_REMOVE(adphead, adp, ad_next);
-	if ((adp->ad_state & COMPLETE) == 0)
-		WORKLIST_REMOVE(&adp->ad_list);
-	if (adp->ad_freefrag != NULL) {
-		if (delay)
-			WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait,
-			    &adp->ad_freefrag->ff_list);
-		else
-			add_to_worklist(&adp->ad_freefrag->ff_list);
+	if (newblk->nb_state & ONDEPLIST)
+		LIST_REMOVE(newblk, nb_deps);
+	if (newblk->nb_state & ONWORKLIST)
+		WORKLIST_REMOVE(&newblk->nb_list);
+	LIST_REMOVE(newblk, nb_hash);
+	if ((freefrag = newblk->nb_freefrag) != NULL) {
+		freefrag->ff_state |= COMPLETE;
+		if ((freefrag->ff_state & ALLCOMPLETE) == ALLCOMPLETE)
+			add_to_worklist(&freefrag->ff_list, 0);
 	}
-	if ((wk = LIST_FIRST(&adp->ad_newdirblk)) != NULL) {
+	if ((wk = LIST_FIRST(&newblk->nb_newdirblk)) != NULL) {
 		newdirblk = WK_NEWDIRBLK(wk);
 		WORKLIST_REMOVE(&newdirblk->db_list);
-		if (!LIST_EMPTY(&adp->ad_newdirblk))
-			panic("free_allocdirect: extra newdirblk");
-		if (delay)
-			WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait,
-			    &newdirblk->db_list);
-		else
-			free_newdirblk(newdirblk);
+		if (!LIST_EMPTY(&newblk->nb_newdirblk))
+			panic("free_newblk: extra newdirblk");
+		free_newdirblk(newdirblk);
 	}
-	WORKITEM_FREE(adp, D_ALLOCDIRECT);
+	while ((indirdep = LIST_FIRST(&newblk->nb_indirdeps)) != NULL) {
+		indirdep->ir_state |= DEPCOMPLETE;
+		indirdep_complete(indirdep);
+	}
+	KASSERT(newblk->nb_jnewblk == NULL,
+	    ("free_newblk; jnewblk %p still attached", newblk->nb_jnewblk));
+	handle_jwork(&newblk->nb_jwork);
+	newblk->nb_list.wk_type = D_NEWBLK;
+	WORKITEM_FREE(newblk, D_NEWBLK);
 }
 
 /*
@@ -2554,6 +5262,7 @@ free_newdirblk(newdirblk)
 {
 	struct pagedep *pagedep;
 	struct diradd *dap;
+	struct worklist *wk;
 	int i;
 
 	mtx_assert(&lk, MA_OWNED);
@@ -2571,17 +5280,25 @@ free_newdirblk(newdirblk)
 	pagedep->pd_state &= ~NEWBLOCK;
 	if ((pagedep->pd_state & ONWORKLIST) == 0)
 		while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL)
-			free_diradd(dap);
+			free_diradd(dap, NULL);
 	/*
 	 * If no dependencies remain, the pagedep will be freed.
 	 */
 	for (i = 0; i < DAHASHSZ; i++)
 		if (!LIST_EMPTY(&pagedep->pd_diraddhd[i]))
 			break;
-	if (i == DAHASHSZ && (pagedep->pd_state & ONWORKLIST) == 0) {
+	if (i == DAHASHSZ && (pagedep->pd_state & ONWORKLIST) == 0 &&
+	    LIST_EMPTY(&pagedep->pd_jmvrefhd)) {
+		KASSERT(LIST_FIRST(&pagedep->pd_dirremhd) == NULL,
+		    ("free_newdirblk: Freeing non-free pagedep %p", pagedep));
 		LIST_REMOVE(pagedep, pd_hash);
 		WORKITEM_FREE(pagedep, D_PAGEDEP);
 	}
+	/* Should only ever be one item in the list. */
+	while ((wk = LIST_FIRST(&newdirblk->db_mkdir)) != NULL) {
+		WORKLIST_REMOVE(wk);
+		handle_written_mkdir(WK_MKDIR(wk), MKDIR_BODY);
+	}
 	WORKITEM_FREE(newdirblk, D_NEWDIRBLK);
 }
 
@@ -2608,6 +5325,7 @@ softdep_freefile(pvp, ino, mode)
 	freefile->fx_mode = mode;
 	freefile->fx_oldinum = ino;
 	freefile->fx_devvp = ip->i_devvp;
+	LIST_INIT(&freefile->fx_jwork);
 	if ((ip->i_flag & IN_SPACECOUNTED) == 0) {
 		UFS_LOCK(ip->i_ump);
 		ip->i_fs->fs_pendinginodes += 1;
@@ -2618,11 +5336,29 @@ softdep_freefile(pvp, ino, mode)
 	 * If the inodedep does not exist, then the zero'ed inode has
 	 * been written to disk. If the allocated inode has never been
 	 * written to disk, then the on-disk inode is zero'ed. In either
-	 * case we can free the file immediately.
+	 * case we can free the file immediately.  If the journal was
+	 * canceled before being written the inode will never make it to
+	 * disk and we must send the canceled journal entrys to
+	 * ffs_freefile() to be cleared in conjunction with the bitmap.
+	 * Any blocks waiting on the inode to write can be safely freed
+	 * here as it will never been written.
 	 */
 	ACQUIRE_LOCK(&lk);
-	if (inodedep_lookup(pvp->v_mount, ino, 0, &inodedep) == 0 ||
-	    check_inode_unwritten(inodedep)) {
+	inodedep_lookup(pvp->v_mount, ino, 0, &inodedep);
+	/*
+	 * Remove this inode from the unlinked list and set
+	 * GOINGAWAY as appropriate to indicate that this inode
+	 * will never be written.
+	 */
+	if (inodedep && inodedep->id_state & UNLINKED) {
+		clear_unlinked_inodedep(inodedep);
+		inodedep_lookup(pvp->v_mount, ino, 0, &inodedep);
+		if (inodedep && (inodedep->id_state & DEPCOMPLETE) == 0) {
+			inodedep->id_state |= GOINGAWAY;
+			handle_bufwait(inodedep, &freefile->fx_jwork);
+		}
+	}
+	if (inodedep == NULL || check_inode_unwritten(inodedep)) {
 		FREE_LOCK(&lk);
 		handle_workitem_freefile(freefile);
 		return;
@@ -2654,7 +5390,8 @@ check_inode_unwritten(inodedep)
 {
 
 	mtx_assert(&lk, MA_OWNED);
-	if ((inodedep->id_state & DEPCOMPLETE) != 0 ||
+
+	if ((inodedep->id_state & (DEPCOMPLETE | UNLINKED)) != 0 ||
 	    !LIST_EMPTY(&inodedep->id_pendinghd) ||
 	    !LIST_EMPTY(&inodedep->id_bufwait) ||
 	    !LIST_EMPTY(&inodedep->id_inowait) ||
@@ -2662,9 +5399,9 @@ check_inode_unwritten(inodedep)
 	    !TAILQ_EMPTY(&inodedep->id_newinoupdt) ||
 	    !TAILQ_EMPTY(&inodedep->id_extupdt) ||
 	    !TAILQ_EMPTY(&inodedep->id_newextupdt) ||
+	    inodedep->id_mkdiradd != NULL || 
 	    inodedep->id_nlinkdelta != 0)
 		return (0);
-
 	/*
 	 * Another process might be in initiate_write_inodeblock_ufs[12]
 	 * trying to allocate memory without holding "Softdep Lock".
@@ -2673,9 +5410,11 @@ check_inode_unwritten(inodedep)
 	    inodedep->id_savedino1 == NULL)
 		return (0);
 
+	if (inodedep->id_state & ONDEPLIST)
+		LIST_REMOVE(inodedep, id_deps);
+	inodedep->id_state &= ~ONDEPLIST;
 	inodedep->id_state |= ALLCOMPLETE;
-	LIST_REMOVE(inodedep, id_deps);
-	inodedep->id_buf = NULL;
+	inodedep->id_bmsafemap = NULL;
 	if (inodedep->id_state & ONWORKLIST)
 		WORKLIST_REMOVE(&inodedep->id_list);
 	if (inodedep->id_savedino1 != NULL) {
@@ -2696,17 +5435,23 @@ free_inodedep(inodedep)
 {
 
 	mtx_assert(&lk, MA_OWNED);
-	if ((inodedep->id_state & ONWORKLIST) != 0 ||
+	if ((inodedep->id_state & (ONWORKLIST | UNLINKED)) != 0 ||
 	    (inodedep->id_state & ALLCOMPLETE) != ALLCOMPLETE ||
+	    !LIST_EMPTY(&inodedep->id_dirremhd) ||
 	    !LIST_EMPTY(&inodedep->id_pendinghd) ||
 	    !LIST_EMPTY(&inodedep->id_bufwait) ||
 	    !LIST_EMPTY(&inodedep->id_inowait) ||
+	    !TAILQ_EMPTY(&inodedep->id_inoreflst) ||
 	    !TAILQ_EMPTY(&inodedep->id_inoupdt) ||
 	    !TAILQ_EMPTY(&inodedep->id_newinoupdt) ||
 	    !TAILQ_EMPTY(&inodedep->id_extupdt) ||
 	    !TAILQ_EMPTY(&inodedep->id_newextupdt) ||
-	    inodedep->id_nlinkdelta != 0 || inodedep->id_savedino1 != NULL)
+	    inodedep->id_mkdiradd != NULL ||
+	    inodedep->id_nlinkdelta != 0 ||
+	    inodedep->id_savedino1 != NULL)
 		return (0);
+	if (inodedep->id_state & ONDEPLIST)
+		LIST_REMOVE(inodedep, id_deps);
 	LIST_REMOVE(inodedep, id_hash);
 	WORKITEM_FREE(inodedep, D_INODEDEP);
 	num_inodedep -= 1;
@@ -2714,6 +5459,123 @@ free_inodedep(inodedep)
 }
 
 /*
+ * Free the block referenced by a freework structure.  The parent freeblks
+ * structure is released and completed when the final cg bitmap reaches
+ * the disk.  This routine may be freeing a jnewblk which never made it to
+ * disk in which case we do not have to wait as the operation is undone
+ * in memory immediately.
+ */
+static void
+freework_freeblock(freework)
+	struct freework *freework;
+{
+	struct freeblks *freeblks;
+	struct ufsmount *ump;
+	struct workhead wkhd;
+	struct fs *fs;
+	int complete;
+	int pending;
+	int bsize;
+
+	freeblks = freework->fw_freeblks;
+	ump = VFSTOUFS(freeblks->fb_list.wk_mp);
+	fs = ump->um_fs;
+	complete = 0;
+	LIST_INIT(&wkhd);
+	/*
+	 * If we are canceling an existing jnewblk pass it to the free
+	 * routine, otherwise pass the freeblk which will ultimately
+	 * release the freeblks
+	 */
+	if (!LIST_EMPTY(&freework->fw_jwork)) {
+		LIST_SWAP(&wkhd, &freework->fw_jwork, worklist, wk_list);
+		complete = 1;
+	} else
+		WORKLIST_INSERT_UNLOCKED(&wkhd, &freework->fw_list);
+	bsize = lfragtosize(fs, freework->fw_frags);
+	pending = btodb(bsize);
+	ACQUIRE_LOCK(&lk);
+	freeblks->fb_chkcnt -= pending;
+	FREE_LOCK(&lk);
+	/*
+	 * extattr blocks don't show up in pending blocks.  XXX why?
+	 */
+	if (freework->fw_lbn >= 0 || freework->fw_lbn <= -NDADDR) {
+		UFS_LOCK(ump);
+		fs->fs_pendingblocks -= pending;
+		UFS_UNLOCK(ump);
+	}
+	ffs_blkfree(ump, fs, freeblks->fb_devvp, freework->fw_blkno,
+	    bsize, freeblks->fb_previousinum, &wkhd);
+	if (complete == 0)
+		return;
+	/*
+	 * The jnewblk will be discarded and the bits in the map never
+	 * made it to disk.  We can immediately free the freeblk.
+	 */
+	ACQUIRE_LOCK(&lk);
+	handle_written_freework(freework);
+	FREE_LOCK(&lk);
+}
+
+/*
+ * Start, continue, or finish the process of freeing an indirect block tree.
+ * The free operation may be paused at any point with fw_off containing the
+ * offset to restart from.  This enables us to implement some flow control
+ * for large truncates which may fan out and generate a huge number of
+ * dependencies.
+ */
+static void
+handle_workitem_indirblk(freework)
+	struct freework *freework;
+{
+	struct freeblks *freeblks;
+	struct ufsmount *ump;
+	struct fs *fs;
+
+
+	freeblks = freework->fw_freeblks;
+	ump = VFSTOUFS(freeblks->fb_list.wk_mp);
+	fs = ump->um_fs;
+	if (freework->fw_off == NINDIR(fs))
+		freework_freeblock(freework);
+	else
+		indir_trunc(freework, fsbtodb(fs, freework->fw_blkno),
+		    freework->fw_lbn);
+}
+
+/*
+ * Called when a freework structure attached to a cg buf is written.  The
+ * ref on either the parent or the freeblks structure is released and
+ * either may be added to the worklist if it is the final ref.
+ */
+static void
+handle_written_freework(freework)
+	struct freework *freework;
+{
+	struct freeblks *freeblks;
+	struct freework *parent;
+
+	freeblks = freework->fw_freeblks;
+	parent = freework->fw_parent;
+	if (parent) {
+		if (--parent->fw_ref != 0)
+			parent = NULL;
+		freeblks = NULL;
+	} else if (--freeblks->fb_ref != 0)
+		freeblks = NULL;
+	WORKITEM_FREE(freework, D_FREEWORK);
+	/*
+	 * Don't delay these block frees or it takes an intolerable amount
+	 * of time to process truncates and free their journal entries.
+	 */
+	if (freeblks)
+		add_to_worklist(&freeblks->fb_list, 1);
+	if (parent)
+		add_to_worklist(&parent->fw_list, 1);
+}
+
+/*
  * This workitem routine performs the block de-allocation.
  * The workitem is added to the pending list after the updated
  * inode block has been written to disk.  As mentioned above,
@@ -2726,99 +5588,79 @@ handle_workitem_freeblocks(freeblks, flags)
 	struct freeblks *freeblks;
 	int flags;
 {
+	struct freework *freework;
+	struct worklist *wk;
+
+	KASSERT(LIST_EMPTY(&freeblks->fb_jfreeblkhd),
+	    ("handle_workitem_freeblocks: Journal entries not written."));
+	if (LIST_EMPTY(&freeblks->fb_freeworkhd)) {
+		handle_complete_freeblocks(freeblks);
+		return;
+	}
+	freeblks->fb_ref++;
+	while ((wk = LIST_FIRST(&freeblks->fb_freeworkhd)) != NULL) {
+		KASSERT(wk->wk_type == D_FREEWORK,
+		    ("handle_workitem_freeblocks: Unknown type %s",
+		    TYPENAME(wk->wk_type)));
+		WORKLIST_REMOVE_UNLOCKED(wk);
+		freework = WK_FREEWORK(wk);
+		if (freework->fw_lbn <= -NDADDR)
+			handle_workitem_indirblk(freework);
+		else
+			freework_freeblock(freework);
+	}
+	ACQUIRE_LOCK(&lk);
+	if (--freeblks->fb_ref != 0)
+		freeblks = NULL;
+	FREE_LOCK(&lk);
+	if (freeblks)
+		handle_complete_freeblocks(freeblks);
+}
+
+/*
+ * Once all of the freework workitems are complete we can retire the
+ * freeblocks dependency and any journal work awaiting completion.  This
+ * can not be called until all other dependencies are stable on disk.
+ */
+static void
+handle_complete_freeblocks(freeblks)
+	struct freeblks *freeblks;
+{
 	struct inode *ip;
 	struct vnode *vp;
 	struct fs *fs;
 	struct ufsmount *ump;
-	int i, nblocks, level, bsize;
-	ufs2_daddr_t bn, blocksreleased = 0;
-	int error, allerror = 0;
-	ufs_lbn_t baselbns[NIADDR], tmpval;
-	int fs_pendingblocks;
+	int flags;
 
 	ump = VFSTOUFS(freeblks->fb_list.wk_mp);
 	fs = ump->um_fs;
-	fs_pendingblocks = 0;
-	tmpval = 1;
-	baselbns[0] = NDADDR;
-	for (i = 1; i < NIADDR; i++) {
-		tmpval *= NINDIR(fs);
-		baselbns[i] = baselbns[i - 1] + tmpval;
-	}
-	nblocks = btodb(fs->fs_bsize);
-	blocksreleased = 0;
+	flags = LK_NOWAIT;
+
 	/*
-	 * Release all extended attribute blocks or frags.
-	 */
-	if (freeblks->fb_oldextsize > 0) {
-		for (i = (NXADDR - 1); i >= 0; i--) {
-			if ((bn = freeblks->fb_eblks[i]) == 0)
-				continue;
-			bsize = sblksize(fs, freeblks->fb_oldextsize, i);
-			ffs_blkfree(ump, fs, freeblks->fb_devvp, bn, bsize,
-			    freeblks->fb_previousinum);
-			blocksreleased += btodb(bsize);
-		}
-	}
-	/*
-	 * Release all data blocks or frags.
-	 */
-	if (freeblks->fb_oldsize > 0) {
-		/*
-		 * Indirect blocks first.
-		 */
-		for (level = (NIADDR - 1); level >= 0; level--) {
-			if ((bn = freeblks->fb_iblks[level]) == 0)
-				continue;
-			if ((error = indir_trunc(freeblks, fsbtodb(fs, bn),
-			    level, baselbns[level], &blocksreleased)) != 0)
-				allerror = error;
-			ffs_blkfree(ump, fs, freeblks->fb_devvp, bn,
-			    fs->fs_bsize, freeblks->fb_previousinum);
-			fs_pendingblocks += nblocks;
-			blocksreleased += nblocks;
-		}
-		/*
-		 * All direct blocks or frags.
-		 */
-		for (i = (NDADDR - 1); i >= 0; i--) {
-			if ((bn = freeblks->fb_dblks[i]) == 0)
-				continue;
-			bsize = sblksize(fs, freeblks->fb_oldsize, i);
-			ffs_blkfree(ump, fs, freeblks->fb_devvp, bn, bsize,
-			    freeblks->fb_previousinum);
-			fs_pendingblocks += btodb(bsize);
-			blocksreleased += btodb(bsize);
-		}
-	}
-	UFS_LOCK(ump);
-	fs->fs_pendingblocks -= fs_pendingblocks;
-	UFS_UNLOCK(ump);
-	/*
 	 * If we still have not finished background cleanup, then check
 	 * to see if the block count needs to be adjusted.
 	 */
-	if (freeblks->fb_chkcnt != blocksreleased &&
-	    (fs->fs_flags & FS_UNCLEAN) != 0 &&
+	if (freeblks->fb_chkcnt != 0 && (fs->fs_flags & FS_UNCLEAN) != 0 &&
 	    ffs_vgetf(freeblks->fb_list.wk_mp, freeblks->fb_previousinum,
-		(flags & LK_NOWAIT) | LK_EXCLUSIVE, &vp, FFSV_FORCEINSMQ)
-	    == 0) {
+	    (flags & LK_NOWAIT) | LK_EXCLUSIVE, &vp, FFSV_FORCEINSMQ) == 0) {
 		ip = VTOI(vp);
-		DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + \
-		    freeblks->fb_chkcnt - blocksreleased);
+		DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + freeblks->fb_chkcnt);
 		ip->i_flag |= IN_CHANGE;
 		vput(vp);
 	}
 
 #ifdef INVARIANTS
-	if (freeblks->fb_chkcnt != blocksreleased &&
+	if (freeblks->fb_chkcnt != 0 && 
 	    ((fs->fs_flags & FS_UNCLEAN) == 0 || (flags & LK_NOWAIT) != 0))
 		printf("handle_workitem_freeblocks: block count\n");
-	if (allerror)
-		softdep_error("handle_workitem_freeblks", allerror);
 #endif /* INVARIANTS */
 
 	ACQUIRE_LOCK(&lk);
+	/*
+	 * All of the freeblock deps must be complete prior to this call
+	 * so it's now safe to complete earlier outstanding journal entries.
+	 */
+	handle_jwork(&freeblks->fb_jwork);
 	WORKITEM_FREE(freeblks, D_FREEBLKS);
 	num_freeblkdep--;
 	FREE_LOCK(&lk);
@@ -2830,29 +5672,39 @@ handle_workitem_freeblocks(freeblks, flags)
  * and recursive calls to indirtrunc must be used to cleanse other indirect
  * blocks.
  */
-static int
-indir_trunc(freeblks, dbn, level, lbn, countp)
-	struct freeblks *freeblks;
+static void
+indir_trunc(freework, dbn, lbn)
+	struct freework *freework;
 	ufs2_daddr_t dbn;
-	int level;
 	ufs_lbn_t lbn;
-	ufs2_daddr_t *countp;
 {
+	struct workhead wkhd;
+	struct jnewblk *jnewblk;
+	struct freeblks *freeblks;
 	struct buf *bp;
 	struct fs *fs;
+	struct worklist *wkn;
 	struct worklist *wk;
 	struct indirdep *indirdep;
 	struct ufsmount *ump;
 	ufs1_daddr_t *bap1 = 0;
-	ufs2_daddr_t nb, *bap2 = 0;
+	ufs2_daddr_t nb, nnb, *bap2 = 0;
 	ufs_lbn_t lbnadd;
 	int i, nblocks, ufs1fmt;
-	int error, allerror = 0;
 	int fs_pendingblocks;
+	int freedeps;
+	int level;
+	int cnt;
 
+	LIST_INIT(&wkhd);
+	level = lbn_level(lbn);
+	if (level == -1)
+		panic("indir_trunc: Invalid lbn %jd\n", lbn);
+	freeblks = freework->fw_freeblks;
 	ump = VFSTOUFS(freeblks->fb_list.wk_mp);
 	fs = ump->um_fs;
 	fs_pendingblocks = 0;
+	freedeps = 0;
 	lbnadd = 1;
 	for (i = level; i > 0; i--)
 		lbnadd *= NINDIR(fs);
@@ -2877,13 +5729,14 @@ handle_workitem_freeblocks(freeblks, flags)
 	ACQUIRE_LOCK(&lk);
 	if (bp != NULL && (wk = LIST_FIRST(&bp->b_dep)) != NULL) {
 		if (wk->wk_type != D_INDIRDEP ||
-		    (indirdep = WK_INDIRDEP(wk))->ir_savebp != bp ||
-		    (indirdep->ir_state & GOINGAWAY) == 0)
-			panic("indir_trunc: lost indirdep");
-		WORKLIST_REMOVE(wk);
-		WORKITEM_FREE(indirdep, D_INDIRDEP);
+		    (wk->wk_state & GOINGAWAY) == 0)
+			panic("indir_trunc: lost indirdep %p", wk);
+		indirdep = WK_INDIRDEP(wk);
+		LIST_SWAP(&wkhd, &indirdep->ir_jwork, worklist, wk_list);
+		free_indirdep(indirdep);
 		if (!LIST_EMPTY(&bp->b_dep))
-			panic("indir_trunc: dangling dep");
+			panic("indir_trunc: dangling dep %p",
+			    LIST_FIRST(&bp->b_dep));
 		ump->um_numindirdeps -= 1;
 		FREE_LOCK(&lk);
 	} else {
@@ -2892,11 +5745,10 @@ handle_workitem_freeblocks(freeblks, flags)
 			brelse(bp);
 #endif
 		FREE_LOCK(&lk);
-		error = bread(freeblks->fb_devvp, dbn, (int)fs->fs_bsize,
-		    NOCRED, &bp);
-		if (error) {
+		if (bread(freeblks->fb_devvp, dbn, (int)fs->fs_bsize,
+		    NOCRED, &bp) != 0) {
 			brelse(bp);
-			return (error);
+			return;
 		}
 	}
 	/*
@@ -2909,57 +5761,245 @@ handle_workitem_freeblocks(freeblks, flags)
 		ufs1fmt = 0;
 		bap2 = (ufs2_daddr_t *)bp->b_data;
 	}
-	nblocks = btodb(fs->fs_bsize);
-	for (i = NINDIR(fs) - 1; i >= 0; i--) {
-		if (ufs1fmt)
+	/*
+	 * Reclaim indirect blocks which never made it to disk.
+	 */
+	cnt = 0;
+	LIST_FOREACH_SAFE(wk, &wkhd, wk_list, wkn) {
+		struct workhead freewk;
+		if (wk->wk_type != D_JNEWBLK)
+			continue;
+		WORKLIST_REMOVE_UNLOCKED(wk);
+		LIST_INIT(&freewk);
+		WORKLIST_INSERT_UNLOCKED(&freewk, wk);
+		jnewblk = WK_JNEWBLK(wk);
+		if (jnewblk->jn_lbn > 0)
+			i = (jnewblk->jn_lbn - -lbn) / lbnadd;
+		else
+			i = (jnewblk->jn_lbn - (lbn + 1)) / lbnadd;
+		KASSERT(i >= 0 && i < NINDIR(fs),
+		    ("indir_trunc: Index out of range %d parent %jd lbn %jd",
+		    i, lbn, jnewblk->jn_lbn));
+		/* Clear the pointer so it isn't found below. */
+		if (ufs1fmt) {
 			nb = bap1[i];
-		else
+			bap1[i] = 0;
+		} else {
 			nb = bap2[i];
+			bap2[i] = 0;
+		}
+		KASSERT(nb == jnewblk->jn_blkno,
+		    ("indir_trunc: Block mismatch %jd != %jd",
+		    nb, jnewblk->jn_blkno));
+		ffs_blkfree(ump, fs, freeblks->fb_devvp, jnewblk->jn_blkno,
+		    fs->fs_bsize, freeblks->fb_previousinum, &freewk);
+		cnt++;
+	}
+	ACQUIRE_LOCK(&lk);
+	freework->fw_ref += NINDIR(fs) + 1;
+	/* Any remaining journal work can be completed with freeblks. */
+	jwork_move(&freeblks->fb_jwork, &wkhd);
+	FREE_LOCK(&lk);
+	nblocks = btodb(fs->fs_bsize);
+	if (ufs1fmt)
+		nb = bap1[0];
+	else
+		nb = bap2[0];
+	/*
+	 * Reclaim on disk blocks.
+	 */
+	for (i = freework->fw_off; i < NINDIR(fs); i++, nb = nnb) {
+		if (i != NINDIR(fs) - 1) {
+			if (ufs1fmt)
+				nnb = bap1[i+1];
+			else
+				nnb = bap2[i+1];
+		} else
+			nnb = 0;
 		if (nb == 0)
 			continue;
+		cnt++;
 		if (level != 0) {
-			if ((error = indir_trunc(freeblks, fsbtodb(fs, nb),
-			     level - 1, lbn + (i * lbnadd), countp)) != 0)
-				allerror = error;
+			struct freework *nfreework;
+			ufs_lbn_t nlbn;
+
+			nlbn = (lbn + 1) - (i * lbnadd);
+			nfreework = newfreework(freeblks, freework, nlbn, nb,
+			    fs->fs_frag, 0);
+			freedeps++;
+			indir_trunc(nfreework, fsbtodb(fs, nb), nlbn);
+		} else {
+			struct freedep *freedep;
+
+			/*
+			 * Attempt to aggregate freedep dependencies for
+			 * all blocks being released to the same CG.
+			 */
+			LIST_INIT(&wkhd);
+			if (nnb == 0 || (dtog(fs, nb) != dtog(fs, nnb))) {
+				freedep = newfreedep(freework);
+				WORKLIST_INSERT_UNLOCKED(&wkhd,
+				    &freedep->fd_list);
+				freedeps++;
+			}
+			ffs_blkfree(ump, fs, freeblks->fb_devvp, nb,
+			    fs->fs_bsize, freeblks->fb_previousinum, &wkhd);
+			fs_pendingblocks += nblocks;
 		}
-		ffs_blkfree(ump, fs, freeblks->fb_devvp, nb, fs->fs_bsize,
-		    freeblks->fb_previousinum);
-		fs_pendingblocks += nblocks;
-		*countp += nblocks;
 	}
-	UFS_LOCK(ump);
-	fs->fs_pendingblocks -= fs_pendingblocks;
-	UFS_UNLOCK(ump);
+	ACQUIRE_LOCK(&lk);
+	freework->fw_off = i;
+	if (level == 0)
+		fs_pendingblocks = (nblocks * cnt);
+	freework->fw_ref += freedeps;
+	freework->fw_ref -= NINDIR(fs) + 1;
+	if (freework->fw_ref != 0)
+		freework = NULL;
+	FREE_LOCK(&lk);
+	if (fs_pendingblocks) {
+		ACQUIRE_LOCK(&lk);
+		freeblks->fb_chkcnt -= fs_pendingblocks;
+		FREE_LOCK(&lk);
+		UFS_LOCK(ump);
+		fs->fs_pendingblocks -= fs_pendingblocks;
+		UFS_UNLOCK(ump);
+	}
 	bp->b_flags |= B_INVAL | B_NOCACHE;
 	brelse(bp);
-	return (allerror);
+	if (freework)
+		handle_workitem_indirblk(freework);
+	return;
 }
 
 /*
- * Free an allocindir.
- * This routine must be called with splbio interrupts blocked.
+ * Cancel an allocindir when it is removed via truncation.
  */
 static void
-free_allocindir(aip, inodedep)
+cancel_allocindir(aip, inodedep, freeblks)
 	struct allocindir *aip;
 	struct inodedep *inodedep;
+	struct freeblks *freeblks;
 {
-	struct freefrag *freefrag;
+	struct newblk *newblk;
 
-	mtx_assert(&lk, MA_OWNED);
-	if ((aip->ai_state & DEPCOMPLETE) == 0)
-		LIST_REMOVE(aip, ai_deps);
-	if (aip->ai_state & ONWORKLIST)
-		WORKLIST_REMOVE(&aip->ai_list);
+	/*
+	 * If the journal hasn't been written the jnewblk must be passed
+	 * to the call to ffs_freeblk that reclaims the space.  We accomplish
+	 * this by linking the journal dependency into the indirdep to be
+	 * freed when indir_trunc() is called.  If the journal has already
+	 * been written we can simply reclaim the journal space when the
+	 * freeblks work is complete.
+	 */
 	LIST_REMOVE(aip, ai_next);
-	if ((freefrag = aip->ai_freefrag) != NULL) {
+	newblk = (struct newblk *)aip;
+	if (newblk->nb_jnewblk == NULL)
+		cancel_newblk(newblk, &freeblks->fb_jwork);
+	else
+		cancel_newblk(newblk, &aip->ai_indirdep->ir_jwork);
+	if (inodedep && inodedep->id_state & DEPCOMPLETE)
+		WORKLIST_INSERT(&inodedep->id_bufwait, &newblk->nb_list);
+	else
+		free_newblk(newblk);
+}
+
+/*
+ * Create the mkdir dependencies for . and .. in a new directory.  Link them
+ * in to a newdirblk so any subsequent additions are tracked properly.  The
+ * caller is responsible for adding the mkdir1 dependency to the journal
+ * and updating id_mkdiradd.  This function returns with lk held.
+ */
+static struct mkdir *
+setup_newdir(dap, newinum, dinum, newdirbp, mkdirp)
+	struct diradd *dap;
+	ino_t newinum;
+	ino_t dinum;
+	struct buf *newdirbp;
+	struct mkdir **mkdirp;
+{
+	struct newblk *newblk;
+	struct pagedep *pagedep;
+	struct inodedep *inodedep;
+	struct newdirblk *newdirblk = 0;
+	struct mkdir *mkdir1, *mkdir2;
+	struct worklist *wk;
+	struct jaddref *jaddref;
+	struct mount *mp;
+
+	mp = dap->da_list.wk_mp;
+	newdirblk = malloc(sizeof(struct newdirblk), M_NEWDIRBLK,
+	    M_SOFTDEP_FLAGS);
+	workitem_alloc(&newdirblk->db_list, D_NEWDIRBLK, mp);
+	LIST_INIT(&newdirblk->db_mkdir);
+	mkdir1 = malloc(sizeof(struct mkdir), M_MKDIR, M_SOFTDEP_FLAGS);
+	workitem_alloc(&mkdir1->md_list, D_MKDIR, mp);
+	mkdir1->md_state = ATTACHED | MKDIR_BODY;
+	mkdir1->md_diradd = dap;
+	mkdir1->md_jaddref = NULL;
+	mkdir2 = malloc(sizeof(struct mkdir), M_MKDIR, M_SOFTDEP_FLAGS);
+	workitem_alloc(&mkdir2->md_list, D_MKDIR, mp);
+	mkdir2->md_state = ATTACHED | MKDIR_PARENT;
+	mkdir2->md_diradd = dap;
+	mkdir2->md_jaddref = NULL;
+	if ((mp->mnt_flag & MNT_SUJ) == 0) {
+		mkdir1->md_state |= DEPCOMPLETE;
+		mkdir2->md_state |= DEPCOMPLETE;
+	}
+	/*
+	 * Dependency on "." and ".." being written to disk.
+	 */
+	mkdir1->md_buf = newdirbp;
+	ACQUIRE_LOCK(&lk);
+	LIST_INSERT_HEAD(&mkdirlisthd, mkdir1, md_mkdirs);
+	/*
+	 * We must link the pagedep, allocdirect, and newdirblk for
+	 * the initial file page so the pointer to the new directory
+	 * is not written until the directory contents are live and
+	 * any subsequent additions are not marked live until the
+	 * block is reachable via the inode.
+	 */
+	if (pagedep_lookup(mp, newinum, 0, 0, &pagedep) == 0)
+		panic("setup_newdir: lost pagedep");
+	LIST_FOREACH(wk, &newdirbp->b_dep, wk_list)
+		if (wk->wk_type == D_ALLOCDIRECT)
+			break;
+	if (wk == NULL)
+		panic("setup_newdir: lost allocdirect");
+	newblk = WK_NEWBLK(wk);
+	pagedep->pd_state |= NEWBLOCK;
+	pagedep->pd_newdirblk = newdirblk;
+	newdirblk->db_pagedep = pagedep;
+	WORKLIST_INSERT(&newblk->nb_newdirblk, &newdirblk->db_list);
+	WORKLIST_INSERT(&newdirblk->db_mkdir, &mkdir1->md_list);
+	/*
+	 * Look up the inodedep for the parent directory so that we
+	 * can link mkdir2 into the pending dotdot jaddref or
+	 * the inode write if there is none.  If the inode is
+	 * ALLCOMPLETE and no jaddref is present all dependencies have
+	 * been satisfied and mkdir2 can be freed.
+	 */
+	inodedep_lookup(mp, dinum, 0, &inodedep);
+	if (mp->mnt_flag & MNT_SUJ) {
 		if (inodedep == NULL)
-			add_to_worklist(&freefrag->ff_list);
-		else
-			WORKLIST_INSERT(&inodedep->id_bufwait,
-			    &freefrag->ff_list);
+			panic("setup_newdir: Lost parent.");
+		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
+		    inoreflst);
+		KASSERT(jaddref != NULL && jaddref->ja_parent == newinum &&
+		    (jaddref->ja_state & MKDIR_PARENT),
+		    ("setup_newdir: bad dotdot jaddref %p", jaddref));
+		LIST_INSERT_HEAD(&mkdirlisthd, mkdir2, md_mkdirs);
+		mkdir2->md_jaddref = jaddref;
+		jaddref->ja_mkdir = mkdir2;
+	} else if (inodedep == NULL ||
+	    (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) {
+		dap->da_state &= ~MKDIR_PARENT;
+		WORKITEM_FREE(mkdir2, D_MKDIR);
+	} else {
+		LIST_INSERT_HEAD(&mkdirlisthd, mkdir2, md_mkdirs);
+		WORKLIST_INSERT(&inodedep->id_bufwait,&mkdir2->md_list);
 	}
-	WORKITEM_FREE(aip, D_ALLOCINDIR);
+	*mkdirp = mkdir2;
+
+	return (mkdir1);
 }
 
 /*
@@ -2998,12 +6038,14 @@ softdep_setup_directory_add(bp, dp, diroffset, new
 	ufs_lbn_t lbn;		/* block in directory containing new entry */
 	struct fs *fs;
 	struct diradd *dap;
-	struct allocdirect *adp;
+	struct newblk *newblk;
 	struct pagedep *pagedep;
 	struct inodedep *inodedep;
 	struct newdirblk *newdirblk = 0;
 	struct mkdir *mkdir1, *mkdir2;
+	struct jaddref *jaddref;
 	struct mount *mp;
+	int isindir;
 
 	/*
 	 * Whiteouts have no dependencies.
@@ -3013,6 +6055,8 @@ softdep_setup_directory_add(bp, dp, diroffset, new
 			bdwrite(newdirbp);
 		return (0);
 	}
+	jaddref = NULL;
+	mkdir1 = mkdir2 = NULL;
 	mp = UFSTOVFS(dp->i_ump);
 	fs = dp->i_fs;
 	lbn = lblkno(fs, diroffset);
@@ -3023,111 +6067,123 @@ softdep_setup_directory_add(bp, dp, diroffset, new
 	dap->da_offset = offset;
 	dap->da_newinum = newinum;
 	dap->da_state = ATTACHED;
-	if (isnewblk && lbn < NDADDR && fragoff(fs, diroffset) == 0) {
+	LIST_INIT(&dap->da_jwork);
+	isindir = bp->b_lblkno >= NDADDR;
+	if (isnewblk &&
+	    (isindir ? blkoff(fs, diroffset) : fragoff(fs, diroffset)) == 0) {
 		newdirblk = malloc(sizeof(struct newdirblk),
 		    M_NEWDIRBLK, M_SOFTDEP_FLAGS);
 		workitem_alloc(&newdirblk->db_list, D_NEWDIRBLK, mp);
+		LIST_INIT(&newdirblk->db_mkdir);
 	}
+	/*
+	 * If we're creating a new directory setup the dependencies and set
+	 * the dap state to wait for them.  Otherwise it's COMPLETE and
+	 * we can move on.
+	 */
 	if (newdirbp == NULL) {
 		dap->da_state |= DEPCOMPLETE;
 		ACQUIRE_LOCK(&lk);
 	} else {
 		dap->da_state |= MKDIR_BODY | MKDIR_PARENT;
-		mkdir1 = malloc(sizeof(struct mkdir), M_MKDIR,
-		    M_SOFTDEP_FLAGS);
-		workitem_alloc(&mkdir1->md_list, D_MKDIR, mp);
-		mkdir1->md_state = MKDIR_BODY;
-		mkdir1->md_diradd = dap;
-		mkdir2 = malloc(sizeof(struct mkdir), M_MKDIR,
-		    M_SOFTDEP_FLAGS);
-		workitem_alloc(&mkdir2->md_list, D_MKDIR, mp);
-		mkdir2->md_state = MKDIR_PARENT;
-		mkdir2->md_diradd = dap;
-		/*
-		 * Dependency on "." and ".." being written to disk.
-		 */
-		mkdir1->md_buf = newdirbp;
-		ACQUIRE_LOCK(&lk);
-		LIST_INSERT_HEAD(&mkdirlisthd, mkdir1, md_mkdirs);
-		WORKLIST_INSERT(&newdirbp->b_dep, &mkdir1->md_list);
-		FREE_LOCK(&lk);
-		bdwrite(newdirbp);
-		/*
-		 * Dependency on link count increase for parent directory
-		 */
-		ACQUIRE_LOCK(&lk);
-		if (inodedep_lookup(mp, dp->i_number, 0, &inodedep) == 0
-		    || (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) {
-			dap->da_state &= ~MKDIR_PARENT;
-			WORKITEM_FREE(mkdir2, D_MKDIR);
-		} else {
-			LIST_INSERT_HEAD(&mkdirlisthd, mkdir2, md_mkdirs);
-			WORKLIST_INSERT(&inodedep->id_bufwait,&mkdir2->md_list);
-		}
+		mkdir1 = setup_newdir(dap, newinum, dp->i_number, newdirbp,
+		    &mkdir2);
 	}
 	/*
 	 * Link into parent directory pagedep to await its being written.
 	 */
-	if (pagedep_lookup(dp, lbn, DEPALLOC, &pagedep) == 0)
+	if (pagedep_lookup(mp, dp->i_number, lbn, DEPALLOC, &pagedep) == 0)
 		WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list);
+#ifdef DEBUG
+	if (diradd_lookup(pagedep, offset) != NULL)
+		panic("softdep_setup_directory_add: %p already at off %d\n",
+		    diradd_lookup(pagedep, offset), offset);
+#endif
 	dap->da_pagedep = pagedep;
 	LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)], dap,
 	    da_pdlist);
+	inodedep_lookup(mp, newinum, DEPALLOC, &inodedep);
 	/*
-	 * Link into its inodedep. Put it on the id_bufwait list if the inode
-	 * is not yet written. If it is written, do the post-inode write
-	 * processing to put it on the id_pendinghd list.
+	 * If we're journaling, link the diradd into the jaddref so it
+	 * may be completed after the journal entry is written.  Otherwise,
+	 * link the diradd into its inodedep.  If the inode is not yet
+	 * written place it on the bufwait list, otherwise do the post-inode
+	 * write processing to put it on the id_pendinghd list.
 	 */
-	(void) inodedep_lookup(mp, newinum, DEPALLOC, &inodedep);
-	if ((inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE)
+	if (mp->mnt_flag & MNT_SUJ) {
+		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
+		    inoreflst);
+		KASSERT(jaddref != NULL && jaddref->ja_parent == dp->i_number,
+		    ("softdep_setup_directory_add: bad jaddref %p", jaddref));
+		jaddref->ja_diroff = diroffset;
+		jaddref->ja_diradd = dap;
+		add_to_journal(&jaddref->ja_list);
+	} else if ((inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE)
 		diradd_inode_written(dap, inodedep);
 	else
 		WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list);
-	if (isnewblk) {
+	/*
+	 * Add the journal entries for . and .. links now that the primary
+	 * link is written.
+	 */
+	if (mkdir1 != NULL && mp->mnt_flag & MNT_SUJ) {
+		jaddref = (struct jaddref *)TAILQ_PREV(&jaddref->ja_ref,
+		    inoreflst, if_deps);
+		KASSERT(jaddref != NULL &&
+		    jaddref->ja_ino == jaddref->ja_parent &&
+		    (jaddref->ja_state & MKDIR_BODY),
+		    ("softdep_setup_directory_add: bad dot jaddref %p",
+		    jaddref));
+		mkdir1->md_jaddref = jaddref;
+		jaddref->ja_mkdir = mkdir1;
 		/*
-		 * Directories growing into indirect blocks are rare
-		 * enough and the frequency of new block allocation
-		 * in those cases even more rare, that we choose not
-		 * to bother tracking them. Rather we simply force the
-		 * new directory entry to disk.
+		 * It is important that the dotdot journal entry
+		 * is added prior to the dot entry since dot writes
+		 * both the dot and dotdot links.  These both must
+		 * be added after the primary link for the journal
+		 * to remain consistent.
 		 */
-		if (lbn >= NDADDR) {
-			FREE_LOCK(&lk);
-			/*
-			 * We only have a new allocation when at the
-			 * beginning of a new block, not when we are
-			 * expanding into an existing block.
-			 */
-			if (blkoff(fs, diroffset) == 0)
-				return (1);
-			return (0);
-		}
+		add_to_journal(&mkdir2->md_jaddref->ja_list);
+		add_to_journal(&jaddref->ja_list);
+	}
+	/*
+	 * If we are adding a new directory remember this diradd so that if
+	 * we rename it we can keep the dot and dotdot dependencies.  If
+	 * we are adding a new name for an inode that has a mkdiradd we
+	 * must be in rename and we have to move the dot and dotdot
+	 * dependencies to this new name.  The old name is being orphaned
+	 * soon.
+	 */
+	if (mkdir1 != NULL) {
+		if (inodedep->id_mkdiradd != NULL)
+			panic("softdep_setup_directory_add: Existing mkdir");
+		inodedep->id_mkdiradd = dap;
+	} else if (inodedep->id_mkdiradd)
+		merge_diradd(inodedep, dap);
+	if (newdirblk) {
 		/*
-		 * We only have a new allocation when at the beginning
-		 * of a new fragment, not when we are expanding into an
-		 * existing fragment. Also, there is nothing to do if we
-		 * are already tracking this block.
+		 * There is nothing to do if we are already tracking
+		 * this block.
 		 */
-		if (fragoff(fs, diroffset) != 0) {
-			FREE_LOCK(&lk);
-			return (0);
-		}
 		if ((pagedep->pd_state & NEWBLOCK) != 0) {
 			WORKITEM_FREE(newdirblk, D_NEWDIRBLK);
 			FREE_LOCK(&lk);
 			return (0);
 		}
-		/*
-		 * Find our associated allocdirect and have it track us.
-		 */
-		if (inodedep_lookup(mp, dp->i_number, 0, &inodedep) == 0)
-			panic("softdep_setup_directory_add: lost inodedep");
-		adp = TAILQ_LAST(&inodedep->id_newinoupdt, allocdirectlst);
-		if (adp == NULL || adp->ad_lbn != lbn)
+		if (newblk_lookup(mp, dbtofsb(fs, bp->b_blkno), 0, &newblk)
+		    == 0)
 			panic("softdep_setup_directory_add: lost entry");
+		WORKLIST_INSERT(&newblk->nb_newdirblk, &newdirblk->db_list);
 		pagedep->pd_state |= NEWBLOCK;
+		pagedep->pd_newdirblk = newdirblk;
 		newdirblk->db_pagedep = pagedep;
-		WORKLIST_INSERT(&adp->ad_newdirblk, &newdirblk->db_list);
+		FREE_LOCK(&lk);
+		/*
+		 * If we extended into an indirect signal direnter to sync.
+		 */
+		if (isindir)
+			return (1);
+		return (0);
 	}
 	FREE_LOCK(&lk);
 	return (0);
@@ -3141,7 +6197,8 @@ softdep_setup_directory_add(bp, dp, diroffset, new
  * occur while the move is in progress.
  */
 void 
-softdep_change_directoryentry_offset(dp, base, oldloc, newloc, entrysize)
+softdep_change_directoryentry_offset(bp, dp, base, oldloc, newloc, entrysize)
+	struct buf *bp;		/* Buffer holding directory block. */
 	struct inode *dp;	/* inode for directory */
 	caddr_t base;		/* address of dp->i_offset */
 	caddr_t oldloc;		/* address of old directory location */
@@ -3150,40 +6207,204 @@ void
 {
 	int offset, oldoffset, newoffset;
 	struct pagedep *pagedep;
+	struct jmvref *jmvref;
 	struct diradd *dap;
+	struct direct *de;
+	struct mount *mp;
 	ufs_lbn_t lbn;
+	int flags;
 
-	ACQUIRE_LOCK(&lk);
+	mp = UFSTOVFS(dp->i_ump);
+	de = (struct direct *)oldloc;
+	jmvref = NULL;
+	flags = 0;
+	/*
+	 * Moves are always journaled as it would be too complex to
+	 * determine if any affected adds or removes are present in the
+	 * journal.
+	 */
+	if (mp->mnt_flag & MNT_SUJ)  {
+		flags = DEPALLOC;
+		jmvref = newjmvref(dp, de->d_ino,
+		    dp->i_offset + (oldloc - base),
+		    dp->i_offset + (newloc - base));
+	}
 	lbn = lblkno(dp->i_fs, dp->i_offset);
 	offset = blkoff(dp->i_fs, dp->i_offset);
-	if (pagedep_lookup(dp, lbn, 0, &pagedep) == 0)
-		goto done;
 	oldoffset = offset + (oldloc - base);
 	newoffset = offset + (newloc - base);
+	ACQUIRE_LOCK(&lk);
+	if (pagedep_lookup(mp, dp->i_number, lbn, flags, &pagedep) == 0) {
+		if (pagedep)
+			WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list);
+		goto done;
+	}
+	dap = diradd_lookup(pagedep, oldoffset);
+	if (dap) {
+		dap->da_offset = newoffset;
+		newoffset = DIRADDHASH(newoffset);
+		oldoffset = DIRADDHASH(oldoffset);
+		if ((dap->da_state & ALLCOMPLETE) != ALLCOMPLETE &&
+		    newoffset != oldoffset) {
+			LIST_REMOVE(dap, da_pdlist);
+			LIST_INSERT_HEAD(&pagedep->pd_diraddhd[newoffset],
+			    dap, da_pdlist);
+		}
+	}
+done:
+	if (jmvref) {
+		jmvref->jm_pagedep = pagedep;
+		LIST_INSERT_HEAD(&pagedep->pd_jmvrefhd, jmvref, jm_deps);
+		add_to_journal(&jmvref->jm_list);
+	}
+	bcopy(oldloc, newloc, entrysize);
+	FREE_LOCK(&lk);
+}
 
-	LIST_FOREACH(dap, &pagedep->pd_diraddhd[DIRADDHASH(oldoffset)], da_pdlist) {
-		if (dap->da_offset != oldoffset)
-			continue;
-		dap->da_offset = newoffset;
-		if (DIRADDHASH(newoffset) == DIRADDHASH(oldoffset))
-			break;
+/*
+ * Move the mkdir dependencies and journal work from one diradd to another
+ * when renaming a directory.  The new name must depend on the mkdir deps
+ * completing as the old name did.  Directories can only have one valid link
+ * at a time so one must be canonical.
+ */
+static void
+merge_diradd(inodedep, newdap)
+	struct inodedep *inodedep;
+	struct diradd *newdap;
+{
+	struct diradd *olddap;
+	struct mkdir *mkdir, *nextmd;
+	short state;
+
+	olddap = inodedep->id_mkdiradd;
+	inodedep->id_mkdiradd = newdap;
+	if ((olddap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) {
+		newdap->da_state &= ~DEPCOMPLETE;
+		for (mkdir = LIST_FIRST(&mkdirlisthd); mkdir; mkdir = nextmd) {
+			nextmd = LIST_NEXT(mkdir, md_mkdirs);
+			if (mkdir->md_diradd != olddap)
+				continue;
+			mkdir->md_diradd = newdap;
+			state = mkdir->md_state & (MKDIR_PARENT | MKDIR_BODY);
+			newdap->da_state |= state;
+			olddap->da_state &= ~state;
+			if ((olddap->da_state &
+			    (MKDIR_PARENT | MKDIR_BODY)) == 0)
+				break;
+		}
+		if ((olddap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0)
+			panic("merge_diradd: unfound ref");
+	}
+	/*
+	 * Any mkdir related journal items are not safe to be freed until
+	 * the new name is stable.
+	 */
+	jwork_move(&newdap->da_jwork, &olddap->da_jwork);
+	olddap->da_state |= DEPCOMPLETE;
+	complete_diradd(olddap);
+}
+
+/*
+ * Move the diradd to the pending list when all diradd dependencies are
+ * complete.
+ */
+static void
+complete_diradd(dap)
+	struct diradd *dap;
+{
+	struct pagedep *pagedep;
+
+	if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) {
+		if (dap->da_state & DIRCHG)
+			pagedep = dap->da_previous->dm_pagedep;
+		else
+			pagedep = dap->da_pagedep;
 		LIST_REMOVE(dap, da_pdlist);
-		LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(newoffset)],
-		    dap, da_pdlist);
-		break;
+		LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist);
 	}
-	if (dap == NULL) {
+}
 
-		LIST_FOREACH(dap, &pagedep->pd_pendinghd, da_pdlist) {
-			if (dap->da_offset == oldoffset) {
-				dap->da_offset = newoffset;
-				break;
+/*
+ * Cancel a diradd when a dirrem overlaps with it.  We must cancel the journal
+ * add entries and conditonally journal the remove.
+ */
+static void
+cancel_diradd(dap, dirrem, jremref, dotremref, dotdotremref)
+	struct diradd *dap;
+	struct dirrem *dirrem;
+	struct jremref *jremref;
+	struct jremref *dotremref;
+	struct jremref *dotdotremref;
+{
+	struct inodedep *inodedep;
+	struct jaddref *jaddref;
+	struct inoref *inoref;
+	struct mkdir *mkdir;
+
+	/*
+	 * If no remove references were allocated we're on a non-journaled
+	 * filesystem and can skip the cancel step.
+	 */
+	if (jremref == NULL) {
+		free_diradd(dap, NULL);
+		return;
+	}
+	/*
+	 * Cancel the primary name an free it if it does not require
+	 * journaling.
+	 */
+	if (inodedep_lookup(dap->da_list.wk_mp, dap->da_newinum,
+	    0, &inodedep) != 0) {
+		/* Abort the addref that reference this diradd.  */
+		TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) {
+			if (inoref->if_list.wk_type != D_JADDREF)
+				continue;
+			jaddref = (struct jaddref *)inoref;
+			if (jaddref->ja_diradd != dap)
+				continue;
+			if (cancel_jaddref(jaddref, inodedep,
+			    &dirrem->dm_jwork) == 0) {
+				free_jremref(jremref);
+				jremref = NULL;
 			}
+			break;
 		}
 	}
-done:
-	bcopy(oldloc, newloc, entrysize);
-	FREE_LOCK(&lk);
+	/*
+	 * Cancel subordinate names and free them if they do not require
+	 * journaling.
+	 */
+	if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) {
+		LIST_FOREACH(mkdir, &mkdirlisthd, md_mkdirs) {
+			if (mkdir->md_diradd != dap)
+				continue;
+			if ((jaddref = mkdir->md_jaddref) == NULL)
+				continue;
+			mkdir->md_jaddref = NULL;
+			if (mkdir->md_state & MKDIR_PARENT) {
+				if (cancel_jaddref(jaddref, NULL,
+				    &dirrem->dm_jwork) == 0) {
+					free_jremref(dotdotremref);
+					dotdotremref = NULL;
+				}
+			} else {
+				if (cancel_jaddref(jaddref, inodedep,
+				    &dirrem->dm_jwork) == 0) {
+					free_jremref(dotremref);
+					dotremref = NULL;
+				}
+			}
+		}
+	}
+
+	if (jremref)
+		journal_jremref(dirrem, jremref, inodedep);
+	if (dotremref)
+		journal_jremref(dirrem, dotremref, inodedep);
+	if (dotdotremref)
+		journal_jremref(dirrem, dotdotremref, NULL);
+	jwork_move(&dirrem->dm_jwork, &dap->da_jwork);
+	free_diradd(dap, &dirrem->dm_jwork);
 }
 
 /*
@@ -3191,8 +6412,9 @@ void
  * with splbio interrupts blocked.
  */
 static void
-free_diradd(dap)
+free_diradd(dap, wkhd)
 	struct diradd *dap;
+	struct workhead *wkhd;
 {
 	struct dirrem *dirrem;
 	struct pagedep *pagedep;
@@ -3200,32 +6422,48 @@ static void
 	struct mkdir *mkdir, *nextmd;
 
 	mtx_assert(&lk, MA_OWNED);
-	WORKLIST_REMOVE(&dap->da_list);
 	LIST_REMOVE(dap, da_pdlist);
+	if (dap->da_state & ONWORKLIST)
+		WORKLIST_REMOVE(&dap->da_list);
 	if ((dap->da_state & DIRCHG) == 0) {
 		pagedep = dap->da_pagedep;
 	} else {
 		dirrem = dap->da_previous;
 		pagedep = dirrem->dm_pagedep;
 		dirrem->dm_dirinum = pagedep->pd_ino;
-		add_to_worklist(&dirrem->dm_list);
+		dirrem->dm_state |= COMPLETE;
+		if (LIST_EMPTY(&dirrem->dm_jremrefhd))
+			add_to_worklist(&dirrem->dm_list, 0);
 	}
 	if (inodedep_lookup(pagedep->pd_list.wk_mp, dap->da_newinum,
 	    0, &inodedep) != 0)
-		(void) free_inodedep(inodedep);
+		if (inodedep->id_mkdiradd == dap)
+			inodedep->id_mkdiradd = NULL;
 	if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) {
 		for (mkdir = LIST_FIRST(&mkdirlisthd); mkdir; mkdir = nextmd) {
 			nextmd = LIST_NEXT(mkdir, md_mkdirs);
 			if (mkdir->md_diradd != dap)
 				continue;
-			dap->da_state &= ~mkdir->md_state;
-			WORKLIST_REMOVE(&mkdir->md_list);
+			dap->da_state &=
+			    ~(mkdir->md_state & (MKDIR_PARENT | MKDIR_BODY));
 			LIST_REMOVE(mkdir, md_mkdirs);
+			if (mkdir->md_state & ONWORKLIST)
+				WORKLIST_REMOVE(&mkdir->md_list);
+			if (mkdir->md_jaddref != NULL)
+				panic("free_diradd: Unexpected jaddref");
 			WORKITEM_FREE(mkdir, D_MKDIR);
+			if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) == 0)
+				break;
 		}
 		if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0)
 			panic("free_diradd: unfound ref");
 	}
+	if (inodedep)
+		free_inodedep(inodedep);
+	/*
+	 * Free any journal segments waiting for the directory write.
+	 */
+	handle_jwork(&dap->da_jwork);
 	WORKITEM_FREE(dap, D_DIRADD);
 }
 
@@ -3254,11 +6492,24 @@ softdep_setup_remove(bp, dp, ip, isrmdir)
 	int isrmdir;		/* indicates if doing RMDIR */
 {
 	struct dirrem *dirrem, *prevdirrem;
+	struct inodedep *inodedep;
+	int direct;
 
 	/*
-	 * Allocate a new dirrem if appropriate and ACQUIRE_LOCK.
+	 * Allocate a new dirrem if appropriate and ACQUIRE_LOCK.  We want
+	 * newdirrem() to setup the full directory remove which requires
+	 * isrmdir > 1.
 	 */
-	dirrem = newdirrem(bp, dp, ip, isrmdir, &prevdirrem);
+	dirrem = newdirrem(bp, dp, ip, isrmdir?2:0, &prevdirrem);
+	/*
+	 * Add the dirrem to the inodedep's pending remove list for quick
+	 * discovery later.
+	 */
+	if (inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, 0,
+	    &inodedep) == 0)
+		panic("softdep_setup_remove: Lost inodedep.");
+	dirrem->dm_state |= ONDEPLIST;
+	LIST_INSERT_HEAD(&inodedep->id_dirremhd, dirrem, dm_inonext);
 
 	/*
 	 * If the COMPLETE flag is clear, then there were no active
@@ -3280,12 +6531,148 @@ softdep_setup_remove(bp, dp, ip, isrmdir)
 			LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd,
 			    prevdirrem, dm_next);
 		dirrem->dm_dirinum = dirrem->dm_pagedep->pd_ino;
+		direct = LIST_EMPTY(&dirrem->dm_jremrefhd);
 		FREE_LOCK(&lk);
-		handle_workitem_remove(dirrem, NULL);
+		if (direct)
+			handle_workitem_remove(dirrem, NULL);
 	}
 }
 
 /*
+ * Check for an entry matching 'offset' on both the pd_dirraddhd list and the
+ * pd_pendinghd list of a pagedep.
+ */
+static struct diradd *
+diradd_lookup(pagedep, offset)
+	struct pagedep *pagedep;
+	int offset;
+{
+	struct diradd *dap;
+
+	LIST_FOREACH(dap, &pagedep->pd_diraddhd[DIRADDHASH(offset)], da_pdlist)
+		if (dap->da_offset == offset)
+			return (dap);
+	LIST_FOREACH(dap, &pagedep->pd_pendinghd, da_pdlist)
+		if (dap->da_offset == offset)
+			return (dap);
+	return (NULL);
+}
+
+/*
+ * Search for a .. diradd dependency in a directory that is being removed.
+ * If the directory was renamed to a new parent we have a diradd rather
+ * than a mkdir for the .. entry.  We need to cancel it now before
+ * it is found in truncate().
+ */
+static struct jremref *
+cancel_diradd_dotdot(ip, dirrem, jremref)
+	struct inode *ip;
+	struct dirrem *dirrem;
+	struct jremref *jremref;
+{
+	struct pagedep *pagedep;
+	struct diradd *dap;
+	struct worklist *wk;
+
+	if (pagedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, 0, 0,
+	    &pagedep) == 0)
+		return (jremref);
+	dap = diradd_lookup(pagedep, DOTDOT_OFFSET);
+	if (dap == NULL)
+		return (jremref);
+	cancel_diradd(dap, dirrem, jremref, NULL, NULL);
+	/*
+	 * Mark any journal work as belonging to the parent so it is freed
+	 * with the .. reference.
+	 */
+	LIST_FOREACH(wk, &dirrem->dm_jwork, wk_list)
+		wk->wk_state |= MKDIR_PARENT;
+	return (NULL);
+}
+
+/*
+ * Cancel the MKDIR_PARENT mkdir component of a diradd when we're going to
+ * replace it with a dirrem/diradd pair as a result of re-parenting a
+ * directory.  This ensures that we don't simultaneously have a mkdir and
+ * a diradd for the same .. entry.
+ */
+static struct jremref *
+cancel_mkdir_dotdot(ip, dirrem, jremref)
+	struct inode *ip;
+	struct dirrem *dirrem;
+	struct jremref *jremref;
+{
+	struct inodedep *inodedep;
+	struct jaddref *jaddref;
+	struct mkdir *mkdir;
+	struct diradd *dap;
+
+	if (inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, 0,
+	    &inodedep) == 0)
+		panic("cancel_mkdir_dotdot: Lost inodedep");
+	dap = inodedep->id_mkdiradd;
+	if (dap == NULL || (dap->da_state & MKDIR_PARENT) == 0)
+		return (jremref);
+	for (mkdir = LIST_FIRST(&mkdirlisthd); mkdir;
+	    mkdir = LIST_NEXT(mkdir, md_mkdirs))
+		if (mkdir->md_diradd == dap && mkdir->md_state & MKDIR_PARENT)
+			break;
+	if (mkdir == NULL)
+		panic("cancel_mkdir_dotdot: Unable to find mkdir\n");
+	if ((jaddref = mkdir->md_jaddref) != NULL) {
+		mkdir->md_jaddref = NULL;
+		if (inodedep_lookup(UFSTOVFS(ip->i_ump), jaddref->ja_ino, 0,
+		    &inodedep) == 0)
+			panic("cancel_mkdir_dotdot: Lost parent inodedep");
+		if (cancel_jaddref(jaddref, inodedep, &dirrem->dm_jwork)) {
+			journal_jremref(dirrem, jremref, inodedep);
+			jremref = NULL;
+		}
+	}
+	if (mkdir->md_state & ONWORKLIST)
+		WORKLIST_REMOVE(&mkdir->md_list);
+	mkdir->md_state |= ALLCOMPLETE;
+	complete_mkdir(mkdir);
+	return (jremref);
+}
+
+static void
+journal_jremref(dirrem, jremref, inodedep)
+	struct dirrem *dirrem;
+	struct jremref *jremref;
+	struct inodedep *inodedep;
+{
+
+	if (inodedep == NULL)
+		if (inodedep_lookup(jremref->jr_list.wk_mp,
+		    jremref->jr_ref.if_ino, 0, &inodedep) == 0)
+			panic("journal_jremref: Lost inodedep");
+	LIST_INSERT_HEAD(&dirrem->dm_jremrefhd, jremref, jr_deps);
+	TAILQ_INSERT_TAIL(&inodedep->id_inoreflst, &jremref->jr_ref, if_deps);
+	add_to_journal(&jremref->jr_list);
+}
+
+static void
+dirrem_journal(dirrem, jremref, dotremref, dotdotremref)
+	struct dirrem *dirrem;
+	struct jremref *jremref;
+	struct jremref *dotremref;
+	struct jremref *dotdotremref;
+{
+	struct inodedep *inodedep;
+
+
+	if (inodedep_lookup(jremref->jr_list.wk_mp, jremref->jr_ref.if_ino, 0,
+	    &inodedep) == 0)
+		panic("dirrem_journal: Lost inodedep");
+	journal_jremref(dirrem, jremref, inodedep);
+	if (dotremref)
+		journal_jremref(dirrem, dotremref, inodedep);
+	if (dotdotremref)
+		journal_jremref(dirrem, dotdotremref, NULL);
+}
+
+/*
  * Allocate a new dirrem if appropriate and return it along with
  * its associated pagedep. Called without a lock, returns with lock.
  */
@@ -3303,12 +6690,17 @@ newdirrem(bp, dp, ip, isrmdir, prevdirremp)
 	struct diradd *dap;
 	struct dirrem *dirrem;
 	struct pagedep *pagedep;
+	struct jremref *jremref;
+	struct jremref *dotremref;
+	struct jremref *dotdotremref;
+	struct vnode *dvp;
 
 	/*
 	 * Whiteouts have no deletion dependencies.
 	 */
 	if (ip == NULL)
 		panic("newdirrem: whiteout");
+	dvp = ITOV(dp);
 	/*
 	 * If we are over our limit, try to improve the situation.
 	 * Limiting the number of dirrem structures will also limit
@@ -3321,34 +6713,75 @@ newdirrem(bp, dp, ip, isrmdir, prevdirremp)
 	FREE_LOCK(&lk);
 	dirrem = malloc(sizeof(struct dirrem),
 		M_DIRREM, M_SOFTDEP_FLAGS|M_ZERO);
-	workitem_alloc(&dirrem->dm_list, D_DIRREM, ITOV(dp)->v_mount);
+	workitem_alloc(&dirrem->dm_list, D_DIRREM, dvp->v_mount);
+	LIST_INIT(&dirrem->dm_jremrefhd);
+	LIST_INIT(&dirrem->dm_jwork);
 	dirrem->dm_state = isrmdir ? RMDIR : 0;
 	dirrem->dm_oldinum = ip->i_number;
 	*prevdirremp = NULL;
-
+	/*
+	 * Allocate remove reference structures to track journal write
+	 * dependencies.  We will always have one for the link and
+	 * when doing directories we will always have one more for dot.
+	 * When renaming a directory we skip the dotdot link change so
+	 * this is not needed.
+	 */
+	jremref = dotremref = dotdotremref = NULL;
+	if (DOINGSUJ(dvp)) {
+		if (isrmdir) {
+			jremref = newjremref(dirrem, dp, ip, dp->i_offset,
+			    ip->i_effnlink + 2);
+			dotremref = newjremref(dirrem, ip, ip, DOT_OFFSET,
+			    ip->i_effnlink + 1);
+		} else
+			jremref = newjremref(dirrem, dp, ip, dp->i_offset,
+			    ip->i_effnlink + 1);
+		if (isrmdir > 1) {
+			dotdotremref = newjremref(dirrem, ip, dp, DOTDOT_OFFSET,
+			    dp->i_effnlink + 1);
+			dotdotremref->jr_state |= MKDIR_PARENT;
+		}
+	}
 	ACQUIRE_LOCK(&lk);
 	lbn = lblkno(dp->i_fs, dp->i_offset);
 	offset = blkoff(dp->i_fs, dp->i_offset);
-	if (pagedep_lookup(dp, lbn, DEPALLOC, &pagedep) == 0)
+	if (pagedep_lookup(UFSTOVFS(dp->i_ump), dp->i_number, lbn, DEPALLOC,
+	    &pagedep) == 0)
 		WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list);
 	dirrem->dm_pagedep = pagedep;
 	/*
+	 * If we're renaming a .. link to a new directory, cancel any
+	 * existing MKDIR_PARENT mkdir.  If it has already been canceled
+	 * the jremref is preserved for any potential diradd in this
+	 * location.  This can not coincide with a rmdir.
+	 */
+	if (dp->i_offset == DOTDOT_OFFSET) {
+		if (isrmdir)
+			panic("newdirrem: .. directory change during remove?");
+		jremref = cancel_mkdir_dotdot(dp, dirrem, jremref);
+	}
+	/*
+	 * If we're removing a directory search for the .. dependency now and
+	 * cancel it.  Any pending journal work will be added to the dirrem
+	 * to be completed when the workitem remove completes.
+	 */
+	if (isrmdir > 1)
+		dotdotremref = cancel_diradd_dotdot(ip, dirrem, dotdotremref);
+	/*
 	 * Check for a diradd dependency for the same directory entry.
 	 * If present, then both dependencies become obsolete and can
-	 * be de-allocated. Check for an entry on both the pd_dirraddhd
-	 * list and the pd_pendinghd list.
+	 * be de-allocated.
 	 */
-
-	LIST_FOREACH(dap, &pagedep->pd_diraddhd[DIRADDHASH(offset)], da_pdlist)
-		if (dap->da_offset == offset)
-			break;
+	dap = diradd_lookup(pagedep, offset);
 	if (dap == NULL) {
-
-		LIST_FOREACH(dap, &pagedep->pd_pendinghd, da_pdlist)
-			if (dap->da_offset == offset)
-				break;
-		if (dap == NULL)
-			return (dirrem);
+		/*
+		 * Link the jremref structures into the dirrem so they are
+		 * written prior to the pagedep.
+		 */
+		if (jremref)
+			dirrem_journal(dirrem, jremref, dotremref,
+			    dotdotremref);
+		return (dirrem);
 	}
 	/*
 	 * Must be ATTACHED at this point.
@@ -3373,7 +6806,17 @@ newdirrem(bp, dp, ip, isrmdir, prevdirremp)
 	 * Mark it COMPLETE so we can delete its inode immediately.
 	 */
 	dirrem->dm_state |= COMPLETE;
-	free_diradd(dap);
+	cancel_diradd(dap, dirrem, jremref, dotremref, dotdotremref);
+#ifdef SUJ_DEBUG
+	if (isrmdir == 0) {
+		struct worklist *wk;
+
+		LIST_FOREACH(wk, &dirrem->dm_jwork, wk_list)
+			if (wk->wk_state & (MKDIR_BODY | MKDIR_PARENT))
+				panic("bad wk %p (0x%X)\n", wk, wk->wk_state);
+	}
+#endif
+
 	return (dirrem);
 }
 
@@ -3407,6 +6850,7 @@ softdep_setup_directory_change(bp, dp, ip, newinum
 	struct dirrem *dirrem, *prevdirrem;
 	struct pagedep *pagedep;
 	struct inodedep *inodedep;
+	struct jaddref *jaddref;
 	struct mount *mp;
 
 	offset = blkoff(dp->i_fs, dp->i_offset);
@@ -3422,6 +6866,7 @@ softdep_setup_directory_change(bp, dp, ip, newinum
 		dap->da_state = DIRCHG | ATTACHED | DEPCOMPLETE;
 		dap->da_offset = offset;
 		dap->da_newinum = newinum;
+		LIST_INIT(&dap->da_jwork);
 	}
 
 	/*
@@ -3454,11 +6899,21 @@ softdep_setup_directory_change(bp, dp, ip, newinum
 			    dm_next);
 		} else {
 			dirrem->dm_dirinum = pagedep->pd_ino;
-			add_to_worklist(&dirrem->dm_list);
+			if (LIST_EMPTY(&dirrem->dm_jremrefhd))
+				add_to_worklist(&dirrem->dm_list, 0);
 		}
 		FREE_LOCK(&lk);
 		return;
 	}
+	/*
+	 * Add the dirrem to the inodedep's pending remove list for quick
+	 * discovery later.  A valid nlinkdelta ensures that this lookup
+	 * will not fail.
+	 */
+	if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) == 0)
+		panic("softdep_setup_directory_change: Lost inodedep.");
+	dirrem->dm_state |= ONDEPLIST;
+	LIST_INSERT_HEAD(&inodedep->id_dirremhd, dirrem, dm_inonext);
 
 	/*
 	 * If the COMPLETE flag is clear, then there were no active
@@ -3483,15 +6938,29 @@ softdep_setup_directory_change(bp, dp, ip, newinum
 			dap->da_pagedep = pagedep;
 		}
 		dirrem->dm_dirinum = pagedep->pd_ino;
-		add_to_worklist(&dirrem->dm_list);
+		if (LIST_EMPTY(&dirrem->dm_jremrefhd))
+			add_to_worklist(&dirrem->dm_list, 0);
 	}
 	/*
-	 * Link into its inodedep. Put it on the id_bufwait list if the inode
+	 * Lookup the jaddref for this journal entry.  We must finish
+	 * initializing it and make the diradd write dependent on it.
+	 * If we're not journaling Put it on the id_bufwait list if the inode
 	 * is not yet written. If it is written, do the post-inode write
 	 * processing to put it on the id_pendinghd list.
 	 */
-	if (inodedep_lookup(mp, newinum, DEPALLOC, &inodedep) == 0 ||
-	    (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) {
+	inodedep_lookup(mp, newinum, DEPALLOC, &inodedep);
+	if (mp->mnt_flag & MNT_SUJ) {
+		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
+		    inoreflst);
+		KASSERT(jaddref != NULL && jaddref->ja_parent == dp->i_number,
+		    ("softdep_setup_directory_change: bad jaddref %p",
+		    jaddref));
+		jaddref->ja_diroff = dp->i_offset;
+		jaddref->ja_diradd = dap;
+		LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)],
+		    dap, da_pdlist);
+		add_to_journal(&jaddref->ja_list);
+	} else if ((inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) {
 		dap->da_state |= COMPLETE;
 		LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist);
 		WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list);
@@ -3500,6 +6969,13 @@ softdep_setup_directory_change(bp, dp, ip, newinum
 		    dap, da_pdlist);
 		WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list);
 	}
+	/*
+	 * If we're making a new name for a directory that has not been
+	 * committed when need to move the dot and dotdot references to
+	 * this new name.
+	 */
+	if (inodedep->id_mkdiradd && dp->i_offset != DOTDOT_OFFSET)
+		merge_diradd(inodedep, dap);
 	FREE_LOCK(&lk);
 }
 
@@ -3516,8 +6992,7 @@ softdep_change_linkcnt(ip)
 	struct inodedep *inodedep;
 
 	ACQUIRE_LOCK(&lk);
-	(void) inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number,
-	    DEPALLOC, &inodedep);
+	inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, DEPALLOC, &inodedep);
 	if (ip->i_nlink < ip->i_effnlink)
 		panic("softdep_change_linkcnt: bad delta");
 	inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
@@ -3574,6 +7049,304 @@ softdep_releasefile(ip)
 }
 
 /*
+ * Attach a sbdep dependency to the superblock buf so that we can keep
+ * track of the head of the linked list of referenced but unlinked inodes.
+ */
+void
+softdep_setup_sbupdate(ump, fs, bp)
+	struct ufsmount *ump;
+	struct fs *fs;
+	struct buf *bp;
+{
+	struct sbdep *sbdep;
+	struct worklist *wk;
+
+	if ((fs->fs_flags & FS_SUJ) == 0)
+		return;
+	LIST_FOREACH(wk, &bp->b_dep, wk_list)
+		if (wk->wk_type == D_SBDEP)
+			break;
+	if (wk != NULL)
+		return;
+	sbdep = malloc(sizeof(struct sbdep), M_SBDEP, M_SOFTDEP_FLAGS);
+	workitem_alloc(&sbdep->sb_list, D_SBDEP, UFSTOVFS(ump));
+	sbdep->sb_fs = fs;
+	sbdep->sb_ump = ump;
+	ACQUIRE_LOCK(&lk);
+	WORKLIST_INSERT(&bp->b_dep, &sbdep->sb_list);
+	FREE_LOCK(&lk);
+}
+
+/*
+ * Return the first unlinked inodedep which is ready to be the head of the
+ * list.  The inodedep and all those after it must have valid next pointers.
+ */
+static struct inodedep *
+first_unlinked_inodedep(ump)
+	struct ufsmount *ump;
+{
+	struct inodedep *inodedep;
+	struct inodedep *idp;
+
+	for (inodedep = TAILQ_LAST(&ump->softdep_unlinked, inodedeplst);
+	    inodedep; inodedep = idp) {
+		if ((inodedep->id_state & UNLINKNEXT) == 0)
+			return (NULL);
+		idp = TAILQ_PREV(inodedep, inodedeplst, id_unlinked);
+		if (idp == NULL || (idp->id_state & UNLINKNEXT) == 0)
+			break;
+		if ((inodedep->id_state & UNLINKPREV) == 0)
+			panic("first_unlinked_inodedep: prev != next");
+	}
+	if (inodedep == NULL)
+		return (NULL);
+
+	return (inodedep);
+}
+
+/*
+ * Set the sujfree unlinked head pointer prior to writing a superblock.
+ */
+static void
+initiate_write_sbdep(sbdep)
+	struct sbdep *sbdep;
+{
+	struct inodedep *inodedep;
+	struct fs *bpfs;
+	struct fs *fs;
+
+	bpfs = sbdep->sb_fs;
+	fs = sbdep->sb_ump->um_fs;
+	inodedep = first_unlinked_inodedep(sbdep->sb_ump);
+	if (inodedep) {
+		fs->fs_sujfree = inodedep->id_ino;
+		inodedep->id_state |= UNLINKPREV;
+	} else
+		fs->fs_sujfree = 0;
+	bpfs->fs_sujfree = fs->fs_sujfree;
+}
+
+/*
+ * After a superblock is written determine whether it must be written again
+ * due to a changing unlinked list head.
+ */
+static int
+handle_written_sbdep(sbdep, bp)
+	struct sbdep *sbdep;
+	struct buf *bp;
+{
+	struct inodedep *inodedep;
+	struct mount *mp;
+	struct fs *fs;
+
+	fs = sbdep->sb_fs;
+	mp = UFSTOVFS(sbdep->sb_ump);
+	inodedep = first_unlinked_inodedep(sbdep->sb_ump);
+	if ((inodedep && fs->fs_sujfree != inodedep->id_ino) ||
+	    (inodedep == NULL && fs->fs_sujfree != 0)) {
+		bdirty(bp);
+		return (1);
+	}
+	WORKITEM_FREE(sbdep, D_SBDEP);
+	if (fs->fs_sujfree == 0)
+		return (0);
+	if (inodedep_lookup(mp, fs->fs_sujfree, 0, &inodedep) == 0)
+		panic("handle_written_sbdep: lost inodedep");
+	/*
+	 * Now that we have a record of this indode in stable store we can
+	 * discard any pending work.
+	 */
+	for (; inodedep != NULL; inodedep = TAILQ_NEXT(inodedep, id_unlinked)) {
+		if ((inodedep->id_state & UNLINKLINKS) != UNLINKLINKS)
+			panic("handle_written_sbdep: Bad inodedep %p (0x%X)",
+			    inodedep, inodedep->id_state);
+		if (handle_bufwait(inodedep, NULL) != NULL)
+			panic("handle_written_sbdep: freefile on "
+			    "unlinked inodedep");
+	}
+
+	return (0);
+}
+
+/*
+ * Mark an inodedep has unlinked and insert it into the in-memory unlinked
+ * list.
+ */
+static void
+unlinked_inodedep(mp, inodedep)
+	struct mount *mp;
+	struct inodedep *inodedep;
+{
+	struct ufsmount *ump;
+
+	if ((mp->mnt_flag & MNT_SUJ) == 0)
+		return;
+	ump = VFSTOUFS(mp);
+	ump->um_fs->fs_fmod = 1;
+	inodedep->id_state |= UNLINKED;
+	TAILQ_INSERT_HEAD(&ump->softdep_unlinked, inodedep, id_unlinked);
+}
+
+/*
+ * Remove an inodedep from the unlinked inodedep list.  This may require
+ * disk writes if the inode has made it that far.
+ */
+static void
+clear_unlinked_inodedep(inodedep)
+	struct inodedep *inodedep;
+{
+	struct ufsmount *ump;
+	struct inodedep *idp;
+	struct inodedep *idn;
+	struct fs *fs;
+	struct buf *bp;
+	ino_t ino;
+	ino_t nino;
+	ino_t pino;
+	int error;
+
+	ump = VFSTOUFS(inodedep->id_list.wk_mp);
+	fs = ump->um_fs;
+	ino = inodedep->id_ino;
+	error = 0;
+	for (;;) {
+		/*
+		 * If nothing has yet been written simply remove us from
+		 * the in memory list and return.  This is the most common
+		 * case where handle_workitem_remove() loses the final
+		 * reference.
+		 */
+		if ((inodedep->id_state & UNLINKLINKS) == 0)
+			break;
+		/*
+		 * If we have a NEXT pointer and no PREV pointer we can simply
+		 * clear NEXT's PREV and remove ourselves from the list.  Be
+		 * careful not to clear PREV if the superblock points at
+		 * next as well.
+		 */
+		idn = TAILQ_NEXT(inodedep, id_unlinked);
+		if ((inodedep->id_state & UNLINKLINKS) == UNLINKNEXT) {
+			if (idn && fs->fs_sujfree != idn->id_ino)
+				idn->id_state &= ~UNLINKPREV;
+			break;
+		}
+		/*
+		 * Here we have an inodedep which is actually linked into
+		 * the list.  We must remove it by forcing a write to the
+		 * link before us, whether it be the superblock or an inode.
+		 * Unfortunately the list may change while we're waiting
+		 * on the buf lock for either resource so we must loop until
+		 * we lock. the right one.  If both the superblock and an
+		 * inode point to this inode we must clear the inode first
+		 * followed by the superblock.
+		 */
+		idp = TAILQ_PREV(inodedep, inodedeplst, id_unlinked);
+		pino = 0;
+		if (idp && (idp->id_state & UNLINKNEXT))
+			pino = idp->id_ino;
+		FREE_LOCK(&lk);
+		if (pino == 0)
+			bp = getblk(ump->um_devvp, btodb(fs->fs_sblockloc),
+			    (int)fs->fs_sbsize, 0, 0, 0);
+		else
+			error = bread(ump->um_devvp,
+			    fsbtodb(fs, ino_to_fsba(fs, pino)),
+			    (int)fs->fs_bsize, NOCRED, &bp);
+		ACQUIRE_LOCK(&lk);
+		if (error)
+			break;
+		/* If the list has changed restart the loop. */
+		idp = TAILQ_PREV(inodedep, inodedeplst, id_unlinked);
+		nino = 0;
+		if (idp && (idp->id_state & UNLINKNEXT))
+			nino = idp->id_ino;
+		if (nino != pino ||
+		    (inodedep->id_state & UNLINKPREV) != UNLINKPREV) {
+			FREE_LOCK(&lk);
+			brelse(bp);
+			ACQUIRE_LOCK(&lk);
+			continue;
+		}
+		/*
+		 * Remove us from the in memory list.  After this we cannot
+		 * access the inodedep.
+		 */
+		idn = TAILQ_NEXT(inodedep, id_unlinked);
+		inodedep->id_state &= ~(UNLINKED | UNLINKLINKS);
+		TAILQ_REMOVE(&ump->softdep_unlinked, inodedep, id_unlinked);
+		/*
+		 * Determine the next inode number.
+		 */
+		nino = 0;
+		if (idn) {
+			/*
+			 * If next isn't on the list we can just clear prev's
+			 * state and schedule it to be fixed later.  No need
+			 * to synchronously write if we're not in the real
+			 * list.
+			 */
+			if ((idn->id_state & UNLINKPREV) == 0 && pino != 0) {
+				idp->id_state &= ~UNLINKNEXT;
+				if ((idp->id_state & ONWORKLIST) == 0)
+					WORKLIST_INSERT(&bp->b_dep,
+					    &idp->id_list);
+				FREE_LOCK(&lk);
+				bawrite(bp);
+				ACQUIRE_LOCK(&lk);
+				return;
+			}
+			nino = idn->id_ino;
+		}
+		FREE_LOCK(&lk);
+		/*
+		 * The predecessor's next pointer is manually updated here
+		 * so that the NEXT flag is never cleared for an element
+		 * that is in the list.
+		 */
+		if (pino == 0) {
+			bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
+			ffs_oldfscompat_write((struct fs *)bp->b_data, ump);
+			softdep_setup_sbupdate(ump, (struct fs *)bp->b_data,
+			    bp);
+		} else if (fs->fs_magic == FS_UFS1_MAGIC)
+			((struct ufs1_dinode *)bp->b_data +
+			    ino_to_fsbo(fs, pino))->di_freelink = nino;
+		else
+			((struct ufs2_dinode *)bp->b_data +
+			    ino_to_fsbo(fs, pino))->di_freelink = nino;
+		/*
+		 * If the bwrite fails we have no recourse to recover.  The
+		 * filesystem is corrupted already.
+		 */
+		bwrite(bp);
+		ACQUIRE_LOCK(&lk);
+		/*
+		 * If the superblock pointer still needs to be cleared force
+		 * a write here.
+		 */
+		if (fs->fs_sujfree == ino) {
+			FREE_LOCK(&lk);
+			bp = getblk(ump->um_devvp, btodb(fs->fs_sblockloc),
+			    (int)fs->fs_sbsize, 0, 0, 0);
+			bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
+			ffs_oldfscompat_write((struct fs *)bp->b_data, ump);
+			softdep_setup_sbupdate(ump, (struct fs *)bp->b_data,
+			    bp);
+			bwrite(bp);
+			ACQUIRE_LOCK(&lk);
+		}
+		if (fs->fs_sujfree != ino)
+			return;
+		panic("clear_unlinked_inodedep: Failed to clear free head");
+	}
+	if (inodedep->id_ino == fs->fs_sujfree)
+		panic("clear_unlinked_inodedep: Freeing head of free list");
+	inodedep->id_state &= ~(UNLINKED | UNLINKLINKS);
+	TAILQ_REMOVE(&ump->softdep_unlinked, inodedep, id_unlinked);
+	return;
+}
+
+/*
  * This workitem decrements the inode's link count.
  * If the link count reaches zero, the file is removed.
  */
@@ -3584,23 +7357,55 @@ handle_workitem_remove(dirrem, xp)
 {
 	struct thread *td = curthread;
 	struct inodedep *inodedep;
+	struct workhead dotdotwk;
+	struct worklist *wk;
+	struct ufsmount *ump;
+	struct mount *mp;
 	struct vnode *vp;
 	struct inode *ip;
 	ino_t oldinum;
 	int error;
 
+	if (dirrem->dm_state & ONWORKLIST)
+		panic("handle_workitem_remove: dirrem %p still on worklist",
+		    dirrem);
+	oldinum = dirrem->dm_oldinum;
+	mp = dirrem->dm_list.wk_mp;
+	ump = VFSTOUFS(mp);
 	if ((vp = xp) == NULL &&
-	    (error = ffs_vgetf(dirrem->dm_list.wk_mp,
-		    dirrem->dm_oldinum, LK_EXCLUSIVE, &vp, FFSV_FORCEINSMQ)) != 0) {
+	    (error = ffs_vgetf(mp, oldinum, LK_EXCLUSIVE, &vp,
+	    FFSV_FORCEINSMQ)) != 0) {
 		softdep_error("handle_workitem_remove: vget", error);
 		return;
 	}
 	ip = VTOI(vp);
 	ACQUIRE_LOCK(&lk);
-	if ((inodedep_lookup(dirrem->dm_list.wk_mp,
-	    dirrem->dm_oldinum, 0, &inodedep)) == 0)
+	if ((inodedep_lookup(mp, oldinum, 0, &inodedep)) == 0)
 		panic("handle_workitem_remove: lost inodedep");
+	if (dirrem->dm_state & ONDEPLIST)
+		LIST_REMOVE(dirrem, dm_inonext);
+	KASSERT(LIST_EMPTY(&dirrem->dm_jremrefhd),
+	    ("handle_workitem_remove:  Journal entries not written."));
+
 	/*
+	 * Move all dependencies waiting on the remove to complete
+	 * from the dirrem to the inode inowait list to be completed
+	 * after the inode has been updated and written to disk.  Any
+	 * marked MKDIR_PARENT are saved to be completed when the .. ref
+	 * is removed.
+	 */
+	LIST_INIT(&dotdotwk);
+	while ((wk = LIST_FIRST(&dirrem->dm_jwork)) != NULL) {
+		WORKLIST_REMOVE(wk);
+		if (wk->wk_state & MKDIR_PARENT) {
+			wk->wk_state &= ~MKDIR_PARENT;
+			WORKLIST_INSERT(&dotdotwk, wk);
+			continue;
+		}
+		WORKLIST_INSERT(&inodedep->id_inowait, wk);
+	}
+	LIST_SWAP(&dirrem->dm_jwork, &dotdotwk, worklist, wk_list);
+	/*
 	 * Normal file deletion.
 	 */
 	if ((dirrem->dm_state & RMDIR) == 0) {
@@ -3609,12 +7414,16 @@ handle_workitem_remove(dirrem, xp)
 		ip->i_flag |= IN_CHANGE;
 		if (ip->i_nlink < ip->i_effnlink)
 			panic("handle_workitem_remove: bad file delta");
+		if (ip->i_nlink == 0) 
+			unlinked_inodedep(mp, inodedep);
 		inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
 		num_dirrem -= 1;
+		KASSERT(LIST_EMPTY(&dirrem->dm_jwork),
+		    ("handle_workitem_remove: worklist not empty. %s",
+		    TYPENAME(LIST_FIRST(&dirrem->dm_jwork)->wk_type)));
 		WORKITEM_FREE(dirrem, D_DIRREM);
 		FREE_LOCK(&lk);
-		vput(vp);
-		return;
+		goto out;
 	}
 	/*
 	 * Directory deletion. Decrement reference count for both the
@@ -3628,6 +7437,8 @@ handle_workitem_remove(dirrem, xp)
 	ip->i_flag |= IN_CHANGE;
 	if (ip->i_nlink < ip->i_effnlink)
 		panic("handle_workitem_remove: bad dir delta");
+	if (ip->i_nlink == 0)
+		unlinked_inodedep(mp, inodedep);
 	inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
 	FREE_LOCK(&lk);
 	if ((error = ffs_truncate(vp, (off_t)0, 0, td->td_ucred, td)) != 0)
@@ -3639,36 +7450,47 @@ handle_workitem_remove(dirrem, xp)
 	 * directory should not change. Thus we skip the followup dirrem.
 	 */
 	if (dirrem->dm_state & DIRCHG) {
+		KASSERT(LIST_EMPTY(&dirrem->dm_jwork),
+		    ("handle_workitem_remove: DIRCHG and worklist not empty."));
 		num_dirrem -= 1;
 		WORKITEM_FREE(dirrem, D_DIRREM);
 		FREE_LOCK(&lk);
-		vput(vp);
-		return;
+		goto out;
 	}
+	dirrem->dm_state = ONDEPLIST;
+	dirrem->dm_oldinum = dirrem->dm_dirinum;
 	/*
-	 * If the inodedep does not exist, then the zero'ed inode has
-	 * been written to disk. If the allocated inode has never been
-	 * written to disk, then the on-disk inode is zero'ed. In either
-	 * case we can remove the file immediately.
+	 * Place the dirrem on the parent's diremhd list.
 	 */
-	dirrem->dm_state = 0;
-	oldinum = dirrem->dm_oldinum;
-	dirrem->dm_oldinum = dirrem->dm_dirinum;
-	if (inodedep_lookup(dirrem->dm_list.wk_mp, oldinum,
-	    0, &inodedep) == 0 || check_inode_unwritten(inodedep)) {
+	if (inodedep_lookup(mp, dirrem->dm_oldinum, 0, &inodedep) == 0)
+		panic("handle_workitem_remove: lost dir inodedep");
+	LIST_INSERT_HEAD(&inodedep->id_dirremhd, dirrem, dm_inonext);
+	/*
+	 * If the allocated inode has never been written to disk, then
+	 * the on-disk inode is zero'ed and we can remove the file
+	 * immediately.  When journaling if the inode has been marked
+	 * unlinked and not DEPCOMPLETE we know it can never be written.
+	 */
+	inodedep_lookup(mp, oldinum, 0, &inodedep);
+	if (inodedep == NULL ||
+	    (inodedep->id_state & (DEPCOMPLETE | UNLINKED)) == UNLINKED ||
+	    check_inode_unwritten(inodedep)) {
 		if (xp != NULL)
-			add_to_worklist(&dirrem->dm_list);
+			add_to_worklist(&dirrem->dm_list, 0);
 		FREE_LOCK(&lk);
-		vput(vp);
-		if (xp == NULL)
+		if (xp == NULL) {
+			vput(vp);
 			handle_workitem_remove(dirrem, NULL);
+		}
 		return;
 	}
 	WORKLIST_INSERT(&inodedep->id_inowait, &dirrem->dm_list);
 	FREE_LOCK(&lk);
 	ip->i_flag |= IN_CHANGE;
+out:
 	ffs_update(vp, 0);
-	vput(vp);
+	if (xp == NULL)
+		vput(vp);
 }
 
 /*
@@ -3689,6 +7511,7 @@ static void
 handle_workitem_freefile(freefile)
 	struct freefile *freefile;
 {
+	struct workhead wkhd;
 	struct fs *fs;
 	struct inodedep *idp;
 	struct ufsmount *ump;
@@ -3701,13 +7524,15 @@ handle_workitem_freefile(freefile)
 	error = inodedep_lookup(UFSTOVFS(ump), freefile->fx_oldinum, 0, &idp);
 	FREE_LOCK(&lk);
 	if (error)
-		panic("handle_workitem_freefile: inodedep survived");
+		panic("handle_workitem_freefile: inodedep %p survived", idp);
 #endif
 	UFS_LOCK(ump);
 	fs->fs_pendinginodes -= 1;
 	UFS_UNLOCK(ump);
+	LIST_INIT(&wkhd);
+	LIST_SWAP(&freefile->fx_jwork, &wkhd, worklist, wk_list);
 	if ((error = ffs_freefile(ump, fs, freefile->fx_devvp,
-	    freefile->fx_oldinum, freefile->fx_mode)) != 0)
+	    freefile->fx_oldinum, freefile->fx_mode, &wkhd)) != 0)
 		softdep_error("handle_workitem_freefile", error);
 	ACQUIRE_LOCK(&lk);
 	WORKITEM_FREE(freefile, D_FREEFILE);
@@ -3757,8 +7582,10 @@ softdep_disk_io_initiation(bp)
 {
 	struct worklist *wk;
 	struct worklist marker;
-	struct indirdep *indirdep;
 	struct inodedep *inodedep;
+	struct freeblks *freeblks;
+	struct jfreeblk *jfreeblk;
+	struct newblk *newblk;
 
 	/*
 	 * We only care about write operations. There should never
@@ -3767,6 +7594,10 @@ softdep_disk_io_initiation(bp)
 	if (bp->b_iocmd != BIO_WRITE)
 		panic("softdep_disk_io_initiation: not write");
 
+	if (bp->b_vflags & BV_BKGRDINPROG)
+		panic("softdep_disk_io_initiation: Writing buffer with "
+		    "background write in progress: %p", bp);
+
 	marker.wk_type = D_LAST + 1;	/* Not a normal workitem */
 	PHOLD(curproc);			/* Don't swap out kernel stack */
 
@@ -3792,46 +7623,58 @@ softdep_disk_io_initiation(bp)
 			continue;
 
 		case D_INDIRDEP:
-			indirdep = WK_INDIRDEP(wk);
-			if (indirdep->ir_state & GOINGAWAY)
-				panic("disk_io_initiation: indirdep gone");
+			initiate_write_indirdep(WK_INDIRDEP(wk), bp);
+			continue;
+
+		case D_BMSAFEMAP:
+			initiate_write_bmsafemap(WK_BMSAFEMAP(wk), bp);
+			continue;
+
+		case D_JSEG:
+			WK_JSEG(wk)->js_buf = NULL;
+			continue;
+
+		case D_FREEBLKS:
+			freeblks = WK_FREEBLKS(wk);
+			jfreeblk = LIST_FIRST(&freeblks->fb_jfreeblkhd);
 			/*
-			 * If there are no remaining dependencies, this
-			 * will be writing the real pointers, so the
-			 * dependency can be freed.
+			 * We have to wait for the jfreeblks to be journaled
+			 * before we can write an inodeblock with updated
+			 * pointers.  Be careful to arrange the marker so
+			 * we revisit the jfreeblk if it's not removed by
+			 * the first jwait().
 			 */
-			if (LIST_EMPTY(&indirdep->ir_deplisthd)) {
-				struct buf *bp;
-
-				bp = indirdep->ir_savebp;
-				bp->b_flags |= B_INVAL | B_NOCACHE;
-				/* inline expand WORKLIST_REMOVE(wk); */
-				wk->wk_state &= ~ONWORKLIST;
-				LIST_REMOVE(wk, wk_list);
-				WORKITEM_FREE(indirdep, D_INDIRDEP);
-				FREE_LOCK(&lk);
-				brelse(bp);
-				ACQUIRE_LOCK(&lk);
-				continue;
+			if (jfreeblk != NULL) {
+				LIST_REMOVE(&marker, wk_list);
+				LIST_INSERT_BEFORE(wk, &marker, wk_list);
+				jwait(&jfreeblk->jf_list);
 			}
+			continue;
+		case D_ALLOCDIRECT:
+		case D_ALLOCINDIR:
 			/*
-			 * Replace up-to-date version with safe version.
+			 * We have to wait for the jnewblk to be journaled
+			 * before we can write to a block otherwise the
+			 * contents may be confused with an earlier file
+			 * at recovery time.  Handle the marker as described
+			 * above.
 			 */
-			FREE_LOCK(&lk);
-			indirdep->ir_saveddata = malloc(bp->b_bcount,
-			    M_INDIRDEP, M_SOFTDEP_FLAGS);
-			ACQUIRE_LOCK(&lk);
-			indirdep->ir_state &= ~ATTACHED;
-			indirdep->ir_state |= UNDONE;
-			bcopy(bp->b_data, indirdep->ir_saveddata, bp->b_bcount);
-			bcopy(indirdep->ir_savebp->b_data, bp->b_data,
-			    bp->b_bcount);
+			newblk = WK_NEWBLK(wk);
+			if (newblk->nb_jnewblk != NULL) {
+				LIST_REMOVE(&marker, wk_list);
+				LIST_INSERT_BEFORE(wk, &marker, wk_list);
+				jwait(&newblk->nb_jnewblk->jn_list);
+			}
 			continue;
 
+		case D_SBDEP:
+			initiate_write_sbdep(WK_SBDEP(wk));
+			continue;
+
 		case D_MKDIR:
-		case D_BMSAFEMAP:
-		case D_ALLOCDIRECT:
-		case D_ALLOCINDIR:
+		case D_FREEWORK:
+		case D_FREEDEP:
+		case D_JSEGDEP:
 			continue;
 
 		default:
@@ -3855,6 +7698,9 @@ initiate_write_filepage(pagedep, bp)
 	struct pagedep *pagedep;
 	struct buf *bp;
 {
+	struct jremref *jremref;
+	struct jmvref *jmvref;
+	struct dirrem *dirrem;
 	struct diradd *dap;
 	struct direct *ep;
 	int i;
@@ -3869,6 +7715,18 @@ initiate_write_filepage(pagedep, bp)
 		return;
 	}
 	pagedep->pd_state |= IOSTARTED;
+	/*
+	 * Wait for all journal remove dependencies to hit the disk.
+	 * We can not allow any potentially conflicting directory adds
+	 * to be visible before removes and rollback is too difficult.
+	 * lk may be dropped and re-acquired, however we hold the buf
+	 * locked so the dependency can not go away.
+	 */
+	LIST_FOREACH(dirrem, &pagedep->pd_dirremhd, dm_next)
+		while ((jremref = LIST_FIRST(&dirrem->dm_jremrefhd)) != NULL)
+			jwait(&jremref->jr_list);
+	while ((jmvref = LIST_FIRST(&pagedep->pd_jmvrefhd)) != NULL)
+		jwait(&jmvref->jm_list);
 	for (i = 0; i < DAHASHSZ; i++) {
 		LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist) {
 			ep = (struct direct *)
@@ -3905,6 +7763,7 @@ initiate_write_inodeblock_ufs1(inodedep, bp)
 	struct allocdirect *adp, *lastadp;
 	struct ufs1_dinode *dp;
 	struct ufs1_dinode *sip;
+	struct inoref *inoref;
 	struct fs *fs;
 	ufs_lbn_t i;
 #ifdef INVARIANTS
@@ -3918,7 +7777,21 @@ initiate_write_inodeblock_ufs1(inodedep, bp)
 	fs = inodedep->id_fs;
 	dp = (struct ufs1_dinode *)bp->b_data +
 	    ino_to_fsbo(fs, inodedep->id_ino);
+
 	/*
+	 * If we're on the unlinked list but have not yet written our
+	 * next pointer initialize it here.
+	 */
+	if ((inodedep->id_state & (UNLINKED | UNLINKNEXT)) == UNLINKED) {
+		struct inodedep *inon;
+
+		inon = TAILQ_NEXT(inodedep, id_unlinked);
+		if (inon)
+			dp->di_freelink = inon->id_ino;
+		else
+			dp->di_freelink = 0;
+	}
+	/*
 	 * If the bitmap is not yet written, then the allocated
 	 * inode cannot be written to disk.
 	 */
@@ -3933,6 +7806,7 @@ initiate_write_inodeblock_ufs1(inodedep, bp)
 		*inodedep->id_savedino1 = *dp;
 		bzero((caddr_t)dp, sizeof(struct ufs1_dinode));
 		dp->di_gen = inodedep->id_savedino1->di_gen;
+		dp->di_freelink = inodedep->id_savedino1->di_freelink;
 		return;
 	}
 	/*
@@ -3940,32 +7814,40 @@ initiate_write_inodeblock_ufs1(inodedep, bp)
 	 */
 	inodedep->id_savedsize = dp->di_size;
 	inodedep->id_savedextsize = 0;
-	if (TAILQ_EMPTY(&inodedep->id_inoupdt))
+	inodedep->id_savednlink = dp->di_nlink;
+	if (TAILQ_EMPTY(&inodedep->id_inoupdt) &&
+	    TAILQ_EMPTY(&inodedep->id_inoreflst))
 		return;
 	/*
+	 * Revert the link count to that of the first unwritten journal entry.
+	 */
+	inoref = TAILQ_FIRST(&inodedep->id_inoreflst);
+	if (inoref)
+		dp->di_nlink = inoref->if_nlink;
+	/*
 	 * Set the dependencies to busy.
 	 */
 	for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
 	     adp = TAILQ_NEXT(adp, ad_next)) {
 #ifdef INVARIANTS
-		if (deplist != 0 && prevlbn >= adp->ad_lbn)
+		if (deplist != 0 && prevlbn >= adp->ad_offset)
 			panic("softdep_write_inodeblock: lbn order");
-		prevlbn = adp->ad_lbn;
-		if (adp->ad_lbn < NDADDR &&
-		    dp->di_db[adp->ad_lbn] != adp->ad_newblkno)
+		prevlbn = adp->ad_offset;
+		if (adp->ad_offset < NDADDR &&
+		    dp->di_db[adp->ad_offset] != adp->ad_newblkno)
 			panic("%s: direct pointer #%jd mismatch %d != %jd",
 			    "softdep_write_inodeblock",
-			    (intmax_t)adp->ad_lbn,
-			    dp->di_db[adp->ad_lbn],
+			    (intmax_t)adp->ad_offset,
+			    dp->di_db[adp->ad_offset],
 			    (intmax_t)adp->ad_newblkno);
-		if (adp->ad_lbn >= NDADDR &&
-		    dp->di_ib[adp->ad_lbn - NDADDR] != adp->ad_newblkno)
+		if (adp->ad_offset >= NDADDR &&
+		    dp->di_ib[adp->ad_offset - NDADDR] != adp->ad_newblkno)
 			panic("%s: indirect pointer #%jd mismatch %d != %jd",
 			    "softdep_write_inodeblock",
-			    (intmax_t)adp->ad_lbn - NDADDR,
-			    dp->di_ib[adp->ad_lbn - NDADDR],
+			    (intmax_t)adp->ad_offset - NDADDR,
+			    dp->di_ib[adp->ad_offset - NDADDR],
 			    (intmax_t)adp->ad_newblkno);
-		deplist |= 1 << adp->ad_lbn;
+		deplist |= 1 << adp->ad_offset;
 		if ((adp->ad_state & ATTACHED) == 0)
 			panic("softdep_write_inodeblock: Unknown state 0x%x",
 			    adp->ad_state);
@@ -3981,14 +7863,14 @@ initiate_write_inodeblock_ufs1(inodedep, bp)
 	 */
 	for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
 	     lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) {
-		if (adp->ad_lbn >= NDADDR)
+		if (adp->ad_offset >= NDADDR)
 			break;
-		dp->di_db[adp->ad_lbn] = adp->ad_oldblkno;
+		dp->di_db[adp->ad_offset] = adp->ad_oldblkno;
 		/* keep going until hitting a rollback to a frag */
 		if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)
 			continue;
-		dp->di_size = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize;
-		for (i = adp->ad_lbn + 1; i < NDADDR; i++) {
+		dp->di_size = fs->fs_bsize * adp->ad_offset + adp->ad_oldsize;
+		for (i = adp->ad_offset + 1; i < NDADDR; i++) {
 #ifdef INVARIANTS
 			if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0)
 				panic("softdep_write_inodeblock: lost dep1");
@@ -4012,8 +7894,8 @@ initiate_write_inodeblock_ufs1(inodedep, bp)
 	 * we already checked for fragments in the loop above.
 	 */
 	if (lastadp != NULL &&
-	    dp->di_size <= (lastadp->ad_lbn + 1) * fs->fs_bsize) {
-		for (i = lastadp->ad_lbn; i >= 0; i--)
+	    dp->di_size <= (lastadp->ad_offset + 1) * fs->fs_bsize) {
+		for (i = lastadp->ad_offset; i >= 0; i--)
 			if (dp->di_db[i] != 0)
 				break;
 		dp->di_size = (i + 1) * fs->fs_bsize;
@@ -4030,7 +7912,7 @@ initiate_write_inodeblock_ufs1(inodedep, bp)
 	 * postpone fsck, we are stuck with this argument.
 	 */
 	for (; adp; adp = TAILQ_NEXT(adp, ad_next))
-		dp->di_ib[adp->ad_lbn - NDADDR] = 0;
+		dp->di_ib[adp->ad_offset - NDADDR] = 0;
 }
 		
 /*
@@ -4051,6 +7933,7 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
 	struct allocdirect *adp, *lastadp;
 	struct ufs2_dinode *dp;
 	struct ufs2_dinode *sip;
+	struct inoref *inoref;
 	struct fs *fs;
 	ufs_lbn_t i;
 #ifdef INVARIANTS
@@ -4064,7 +7947,21 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
 	fs = inodedep->id_fs;
 	dp = (struct ufs2_dinode *)bp->b_data +
 	    ino_to_fsbo(fs, inodedep->id_ino);
+
 	/*
+	 * If we're on the unlinked list but have not yet written our
+	 * next pointer initialize it here.
+	 */
+	if ((inodedep->id_state & (UNLINKED | UNLINKNEXT)) == UNLINKED) {
+		struct inodedep *inon;
+
+		inon = TAILQ_NEXT(inodedep, id_unlinked);
+		if (inon)
+			dp->di_freelink = inon->id_ino;
+		else
+			dp->di_freelink = 0;
+	}
+	/*
 	 * If the bitmap is not yet written, then the allocated
 	 * inode cannot be written to disk.
 	 */
@@ -4079,6 +7976,7 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
 		*inodedep->id_savedino2 = *dp;
 		bzero((caddr_t)dp, sizeof(struct ufs2_dinode));
 		dp->di_gen = inodedep->id_savedino2->di_gen;
+		dp->di_freelink = inodedep->id_savedino1->di_freelink;
 		return;
 	}
 	/*
@@ -4086,25 +7984,38 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
 	 */
 	inodedep->id_savedsize = dp->di_size;
 	inodedep->id_savedextsize = dp->di_extsize;
+	inodedep->id_savednlink = dp->di_nlink;
 	if (TAILQ_EMPTY(&inodedep->id_inoupdt) &&
-	    TAILQ_EMPTY(&inodedep->id_extupdt))
+	    TAILQ_EMPTY(&inodedep->id_extupdt) &&
+	    TAILQ_EMPTY(&inodedep->id_inoreflst))
 		return;
 	/*
+	 * Revert the link count to that of the first unwritten journal entry.
+	 *
+	 * XXX What if it is canceled?  Could entries after it be expired
+	 * before we remove this?  Thus leaving us with an incorrect link on
+	 * disk with no journal entries to cover it?
+	 */
+	inoref = TAILQ_FIRST(&inodedep->id_inoreflst);
+	if (inoref)
+		dp->di_nlink = inoref->if_nlink;
+
+	/*
 	 * Set the ext data dependencies to busy.
 	 */
 	for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_extupdt); adp;
 	     adp = TAILQ_NEXT(adp, ad_next)) {
 #ifdef INVARIANTS
-		if (deplist != 0 && prevlbn >= adp->ad_lbn)
+		if (deplist != 0 && prevlbn >= adp->ad_offset)
 			panic("softdep_write_inodeblock: lbn order");
-		prevlbn = adp->ad_lbn;
-		if (dp->di_extb[adp->ad_lbn] != adp->ad_newblkno)
+		prevlbn = adp->ad_offset;
+		if (dp->di_extb[adp->ad_offset] != adp->ad_newblkno)
 			panic("%s: direct pointer #%jd mismatch %jd != %jd",
 			    "softdep_write_inodeblock",
-			    (intmax_t)adp->ad_lbn,
-			    (intmax_t)dp->di_extb[adp->ad_lbn],
+			    (intmax_t)adp->ad_offset,
+			    (intmax_t)dp->di_extb[adp->ad_offset],
 			    (intmax_t)adp->ad_newblkno);
-		deplist |= 1 << adp->ad_lbn;
+		deplist |= 1 << adp->ad_offset;
 		if ((adp->ad_state & ATTACHED) == 0)
 			panic("softdep_write_inodeblock: Unknown state 0x%x",
 			    adp->ad_state);
@@ -4120,12 +8031,12 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
 	 */
 	for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_extupdt); adp;
 	     lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) {
-		dp->di_extb[adp->ad_lbn] = adp->ad_oldblkno;
+		dp->di_extb[adp->ad_offset] = adp->ad_oldblkno;
 		/* keep going until hitting a rollback to a frag */
 		if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)
 			continue;
-		dp->di_extsize = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize;
-		for (i = adp->ad_lbn + 1; i < NXADDR; i++) {
+		dp->di_extsize = fs->fs_bsize * adp->ad_offset + adp->ad_oldsize;
+		for (i = adp->ad_offset + 1; i < NXADDR; i++) {
 #ifdef INVARIANTS
 			if (dp->di_extb[i] != 0 && (deplist & (1 << i)) == 0)
 				panic("softdep_write_inodeblock: lost dep1");
@@ -4142,8 +8053,8 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
 	 * we already checked for fragments in the loop above.
 	 */
 	if (lastadp != NULL &&
-	    dp->di_extsize <= (lastadp->ad_lbn + 1) * fs->fs_bsize) {
-		for (i = lastadp->ad_lbn; i >= 0; i--)
+	    dp->di_extsize <= (lastadp->ad_offset + 1) * fs->fs_bsize) {
+		for (i = lastadp->ad_offset; i >= 0; i--)
 			if (dp->di_extb[i] != 0)
 				break;
 		dp->di_extsize = (i + 1) * fs->fs_bsize;
@@ -4154,24 +8065,24 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
 	for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
 	     adp = TAILQ_NEXT(adp, ad_next)) {
 #ifdef INVARIANTS
-		if (deplist != 0 && prevlbn >= adp->ad_lbn)
+		if (deplist != 0 && prevlbn >= adp->ad_offset)
 			panic("softdep_write_inodeblock: lbn order");
-		prevlbn = adp->ad_lbn;
-		if (adp->ad_lbn < NDADDR &&
-		    dp->di_db[adp->ad_lbn] != adp->ad_newblkno)
+		prevlbn = adp->ad_offset;
+		if (adp->ad_offset < NDADDR &&
+		    dp->di_db[adp->ad_offset] != adp->ad_newblkno)
 			panic("%s: direct pointer #%jd mismatch %jd != %jd",
 			    "softdep_write_inodeblock",
-			    (intmax_t)adp->ad_lbn,
-			    (intmax_t)dp->di_db[adp->ad_lbn],
+			    (intmax_t)adp->ad_offset,
+			    (intmax_t)dp->di_db[adp->ad_offset],
 			    (intmax_t)adp->ad_newblkno);
-		if (adp->ad_lbn >= NDADDR &&
-		    dp->di_ib[adp->ad_lbn - NDADDR] != adp->ad_newblkno)
+		if (adp->ad_offset >= NDADDR &&
+		    dp->di_ib[adp->ad_offset - NDADDR] != adp->ad_newblkno)
 			panic("%s indirect pointer #%jd mismatch %jd != %jd",
 			    "softdep_write_inodeblock:",
-			    (intmax_t)adp->ad_lbn - NDADDR,
-			    (intmax_t)dp->di_ib[adp->ad_lbn - NDADDR],
+			    (intmax_t)adp->ad_offset - NDADDR,
+			    (intmax_t)dp->di_ib[adp->ad_offset - NDADDR],
 			    (intmax_t)adp->ad_newblkno);
-		deplist |= 1 << adp->ad_lbn;
+		deplist |= 1 << adp->ad_offset;
 		if ((adp->ad_state & ATTACHED) == 0)
 			panic("softdep_write_inodeblock: Unknown state 0x%x",
 			    adp->ad_state);
@@ -4187,14 +8098,14 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
 	 */
 	for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
 	     lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) {
-		if (adp->ad_lbn >= NDADDR)
+		if (adp->ad_offset >= NDADDR)
 			break;
-		dp->di_db[adp->ad_lbn] = adp->ad_oldblkno;
+		dp->di_db[adp->ad_offset] = adp->ad_oldblkno;
 		/* keep going until hitting a rollback to a frag */
 		if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)
 			continue;
-		dp->di_size = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize;
-		for (i = adp->ad_lbn + 1; i < NDADDR; i++) {
+		dp->di_size = fs->fs_bsize * adp->ad_offset + adp->ad_oldsize;
+		for (i = adp->ad_offset + 1; i < NDADDR; i++) {
 #ifdef INVARIANTS
 			if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0)
 				panic("softdep_write_inodeblock: lost dep2");
@@ -4218,8 +8129,8 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
 	 * we already checked for fragments in the loop above.
 	 */
 	if (lastadp != NULL &&
-	    dp->di_size <= (lastadp->ad_lbn + 1) * fs->fs_bsize) {
-		for (i = lastadp->ad_lbn; i >= 0; i--)
+	    dp->di_size <= (lastadp->ad_offset + 1) * fs->fs_bsize) {
+		for (i = lastadp->ad_offset; i >= 0; i--)
 			if (dp->di_db[i] != 0)
 				break;
 		dp->di_size = (i + 1) * fs->fs_bsize;
@@ -4236,16 +8147,365 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
 	 * postpone fsck, we are stuck with this argument.
 	 */
 	for (; adp; adp = TAILQ_NEXT(adp, ad_next))
-		dp->di_ib[adp->ad_lbn - NDADDR] = 0;
+		dp->di_ib[adp->ad_offset - NDADDR] = 0;
 }
 
 /*
+ * Cancel an indirdep as a result of truncation.  Release all of the
+ * children allocindirs and place their journal work on the appropriate
+ * list.
+ */
+static void
+cancel_indirdep(indirdep, bp, inodedep, freeblks)
+	struct indirdep *indirdep;
+	struct buf *bp;
+	struct inodedep *inodedep;
+	struct freeblks *freeblks;
+{
+	struct allocindir *aip;
+
+	/*
+	 * None of the indirect pointers will ever be visible,
+	 * so they can simply be tossed. GOINGAWAY ensures
+	 * that allocated pointers will be saved in the buffer
+	 * cache until they are freed. Note that they will
+	 * only be able to be found by their physical address
+	 * since the inode mapping the logical address will
+	 * be gone. The save buffer used for the safe copy
+	 * was allocated in setup_allocindir_phase2 using
+	 * the physical address so it could be used for this
+	 * purpose. Hence we swap the safe copy with the real
+	 * copy, allowing the safe copy to be freed and holding
+	 * on to the real copy for later use in indir_trunc.
+	 */
+	if (indirdep->ir_state & GOINGAWAY)
+		panic("cancel_indirdep: already gone");
+	if (indirdep->ir_state & ONDEPLIST) {
+		indirdep->ir_state &= ~ONDEPLIST;
+		LIST_REMOVE(indirdep, ir_next);
+	}
+	indirdep->ir_state |= GOINGAWAY;
+	VFSTOUFS(indirdep->ir_list.wk_mp)->um_numindirdeps += 1;
+	while ((aip = LIST_FIRST(&indirdep->ir_deplisthd)) != 0)
+		cancel_allocindir(aip, inodedep, freeblks);
+	while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != 0)
+		cancel_allocindir(aip, inodedep, freeblks);
+	while ((aip = LIST_FIRST(&indirdep->ir_writehd)) != 0)
+		cancel_allocindir(aip, inodedep, freeblks);
+	while ((aip = LIST_FIRST(&indirdep->ir_completehd)) != 0)
+		cancel_allocindir(aip, inodedep, freeblks);
+	bcopy(bp->b_data, indirdep->ir_savebp->b_data, bp->b_bcount);
+	WORKLIST_REMOVE(&indirdep->ir_list);
+	WORKLIST_INSERT(&indirdep->ir_savebp->b_dep, &indirdep->ir_list);
+	indirdep->ir_savebp = NULL;
+}
+
+/*
+ * Free an indirdep once it no longer has new pointers to track.
+ */
+static void
+free_indirdep(indirdep)
+	struct indirdep *indirdep;
+{
+
+	KASSERT(LIST_EMPTY(&indirdep->ir_jwork),
+	    ("free_indirdep: Journal work not empty."));
+	KASSERT(LIST_EMPTY(&indirdep->ir_completehd),
+	    ("free_indirdep: Complete head not empty."));
+	KASSERT(LIST_EMPTY(&indirdep->ir_writehd),
+	    ("free_indirdep: write head not empty."));
+	KASSERT(LIST_EMPTY(&indirdep->ir_donehd),
+	    ("free_indirdep: done head not empty."));
+	KASSERT(LIST_EMPTY(&indirdep->ir_deplisthd),
+	    ("free_indirdep: deplist head not empty."));
+	KASSERT(indirdep->ir_savebp == NULL,
+	    ("free_indirdep: %p ir_savebp != NULL", indirdep));
+	KASSERT((indirdep->ir_state & ONDEPLIST) == 0,
+	    ("free_indirdep: %p still on deplist.", indirdep));
+	if (indirdep->ir_state & ONWORKLIST)
+		WORKLIST_REMOVE(&indirdep->ir_list);
+	WORKITEM_FREE(indirdep, D_INDIRDEP);
+}
+
+/*
+ * Called before a write to an indirdep.  This routine is responsible for
+ * rolling back pointers to a safe state which includes only those
+ * allocindirs which have been completed.
+ */
+static void
+initiate_write_indirdep(indirdep, bp)
+	struct indirdep *indirdep;
+	struct buf *bp;
+{
+
+	if (indirdep->ir_state & GOINGAWAY)
+		panic("disk_io_initiation: indirdep gone");
+
+	/*
+	 * If there are no remaining dependencies, this will be writing
+	 * the real pointers.
+	 */
+	if (LIST_EMPTY(&indirdep->ir_deplisthd))
+		return;
+	/*
+	 * Replace up-to-date version with safe version.
+	 */
+	FREE_LOCK(&lk);
+	indirdep->ir_saveddata = malloc(bp->b_bcount, M_INDIRDEP,
+	    M_SOFTDEP_FLAGS);
+	ACQUIRE_LOCK(&lk);
+	indirdep->ir_state &= ~ATTACHED;
+	indirdep->ir_state |= UNDONE;
+	bcopy(bp->b_data, indirdep->ir_saveddata, bp->b_bcount);
+	bcopy(indirdep->ir_savebp->b_data, bp->b_data,
+	    bp->b_bcount);
+}
+
+/*
+ * Called when an inode has been cleared in a cg bitmap.  This finally
+ * eliminates any canceled jaddrefs
+ */
+void
+softdep_setup_inofree(mp, bp, ino, wkhd)
+	struct mount *mp;
+	struct buf *bp;
+	ino_t ino;
+	struct workhead *wkhd;
+{
+	struct worklist *wk, *wkn;
+	struct bmsafemap *bmsafemap;
+	struct inodedep *inodedep;
+	uint8_t *inosused;
+	struct cg *cgp;
+	struct fs *fs;
+
+	ACQUIRE_LOCK(&lk);
+	fs = VFSTOUFS(mp)->um_fs;
+	bmsafemap = bmsafemap_lookup(mp, bp, ino_to_cg(fs, ino));
+	cgp = (struct cg *)bp->b_data;
+	inosused = cg_inosused(cgp);
+	if (isset(inosused, ino % fs->fs_ipg))
+		panic("softdep_setup_inofree: inode %d not freed.", ino);
+	if (inodedep_lookup(mp, ino, 0, &inodedep))
+		panic("softdep_setup_inofree: ino %d has existing inodedep %p",
+		    ino, inodedep);
+	if (wkhd) {	/* XXX Temporary. */
+	LIST_FOREACH_SAFE(wk, wkhd, wk_list, wkn) {
+		if (wk->wk_type != D_JADDREF)
+			continue;
+		WORKLIST_REMOVE(wk);
+		/*
+		 * We can free immediately even if the jaddref isn't attached
+		 * in a background write as now the bitmaps are reconciled.
+	 	 */
+		wk->wk_state |= COMPLETE | ATTACHED;
+		free_jaddref(WK_JADDREF(wk));
+	}
+	jwork_move(&bp->b_dep, wkhd);
+	}
+	FREE_LOCK(&lk);
+}
+
+
+/*
+ * Called via ffs_blkfree() after a set of frags has been cleared from a cg
+ * map.  Any dependencies waiting for the write to clear are added to the
+ * buf's list and any jnewblks that are being canceled are discarded
+ * immediately.
+ */
+void
+softdep_setup_blkfree(mp, bp, blkno, frags, wkhd)
+	struct mount *mp;
+	struct buf *bp;
+	ufs2_daddr_t blkno;
+	int frags;
+	struct workhead *wkhd;
+{
+	struct bmsafemap *bmsafemap;
+	struct jnewblk *jnewblk;
+	struct worklist *wk, *wkn;
+	struct fs *fs;
+#ifdef SUJ_DEBUG
+	uint8_t *blksfree;
+	struct cg *cgp;
+	ufs2_daddr_t jstart;
+	ufs2_daddr_t jend;
+	ufs2_daddr_t end;
+	long bno;
+	int i;
+#endif
+
+	if ((mp->mnt_flag & FS_SUJ) == 0)
+		return;
+	ACQUIRE_LOCK(&lk);
+	fs = VFSTOUFS(mp)->um_fs;
+	bmsafemap = bmsafemap_lookup(mp, bp, dtog(fs, blkno));
+	/*
+	 * Detach any jnewblks which have been canceled.  They must linger
+	 * until the bitmap is cleared again by ffs_blkfree() to prevent
+	 * an unjournaled allocation from hitting the disk.
+	 */
+	if (wkhd) {
+		LIST_FOREACH_SAFE(wk, wkhd, wk_list, wkn) {
+			if (wk->wk_type != D_JNEWBLK)
+				continue;
+			jnewblk = WK_JNEWBLK(wk);
+			KASSERT(jnewblk->jn_state & GOINGAWAY,
+			    ("softdep_setup_blkfree: jnewblk not canceled."));
+			WORKLIST_REMOVE(wk);
+#ifdef SUJ_DEBUG
+			/*
+			 * Assert that this block is free in the bitmap
+			 * before we  discard the jnewblk.
+			 */
+			cgp = (struct cg *)bp->b_data;
+			blksfree = cg_blksfree(cgp);
+			bno = dtogd(fs, jnewblk->jn_blkno);
+			for (i = jnewblk->jn_oldfrags;
+			    i < jnewblk->jn_frags; i++)
+				if (isclr(blksfree, bno + i) == 0)
+					continue;
+				panic("softdep_setup_blkfree: not free");
+#endif
+			/*
+			 * Even if it's not attached we can free immediately
+			 * as the new bitmap is correct.
+			 */
+			wk->wk_state |= COMPLETE | ATTACHED;
+			free_jnewblk(jnewblk);
+		}
+		/*
+		 * The buf must be locked by the caller otherwise these could
+		 * be added while it's being written and the write would
+		 * complete them before they made it to disk.
+		 */
+		jwork_move(&bp->b_dep, wkhd);
+	}
+
+#ifdef SUJ_DEBUG
+	/*
+	 * Assert that we are not freeing a block which has an outstanding
+	 * allocation dependency.
+	 */
+	fs = VFSTOUFS(mp)->um_fs;
+	end = blkno + frags;
+	LIST_FOREACH(jnewblk, &bmsafemap->sm_jnewblkhd, jn_deps) {
+		/*
+		 * Don't match against blocks that will be freed when the
+		 * background write is done.
+		 */
+		if ((jnewblk->jn_state & (ATTACHED | COMPLETE | DEPCOMPLETE)) ==
+		    (COMPLETE | DEPCOMPLETE))
+			continue;
+		jstart = jnewblk->jn_blkno + jnewblk->jn_oldfrags;
+		jend = jnewblk->jn_blkno + jnewblk->jn_frags;
+		if ((blkno >= jstart && blkno < jend) ||
+		    (end > jstart && end <= jend)) {
+			printf("state 0x%X %jd - %d %d dep %p\n",
+			    jnewblk->jn_state, jnewblk->jn_blkno,
+			    jnewblk->jn_oldfrags, jnewblk->jn_frags,
+			    jnewblk->jn_newblk);
+			panic("softdep_setup_blkfree: "
+			    "%jd-%jd(%d) overlaps with %jd-%jd",
+			    blkno, end, frags, jstart, jend);
+		}
+	}
+#endif
+	FREE_LOCK(&lk);
+}
+
+static void 
+initiate_write_bmsafemap(bmsafemap, bp)
+	struct bmsafemap *bmsafemap;
+	struct buf *bp;			/* The cg block. */
+{
+	struct jaddref *jaddref;
+	struct jnewblk *jnewblk;
+	uint8_t *inosused;
+	uint8_t *blksfree;
+	struct cg *cgp;
+	struct fs *fs;
+	int cleared;
+	ino_t ino;
+	long bno;
+	int i;
+
+	if (bmsafemap->sm_state & IOSTARTED)
+		panic("initiate_write_bmsafemap: Already started\n");
+	bmsafemap->sm_state |= IOSTARTED;
+	/*
+	 * Clear any inode allocations which are pending journal writes.
+	 */
+	if (LIST_FIRST(&bmsafemap->sm_jaddrefhd) != NULL) {
+		cgp = (struct cg *)bp->b_data;
+		fs = VFSTOUFS(bmsafemap->sm_list.wk_mp)->um_fs;
+		inosused = cg_inosused(cgp);
+		LIST_FOREACH(jaddref, &bmsafemap->sm_jaddrefhd, ja_bmdeps) {
+			ino = jaddref->ja_ino % fs->fs_ipg;
+			/*
+			 * If this is a background copy the inode may not
+			 * be marked used yet.
+			 */
+			if (isset(inosused, ino)) {
+				if ((jaddref->ja_mode & IFMT) == IFDIR)
+					cgp->cg_cs.cs_ndir--;
+				cgp->cg_cs.cs_nifree++;
+				clrbit(inosused, ino);
+				jaddref->ja_state &= ~ATTACHED;
+				jaddref->ja_state |= UNDONE;
+			} else if ((bp->b_xflags & BX_BKGRDMARKER) == 0)
+				panic("initiate_write_bmsafemap: inode %d "
+				    "marked free", jaddref->ja_ino);
+		}
+	}
+	/*
+	 * Clear any block allocations which are pending journal writes.
+	 */
+	if (LIST_FIRST(&bmsafemap->sm_jnewblkhd) != NULL) {
+		cgp = (struct cg *)bp->b_data;
+		fs = VFSTOUFS(bmsafemap->sm_list.wk_mp)->um_fs;
+		blksfree = cg_blksfree(cgp);
+		LIST_FOREACH(jnewblk, &bmsafemap->sm_jnewblkhd, jn_deps) {
+			bno = dtogd(fs, jnewblk->jn_blkno);
+			cleared = 0;
+			for (i = jnewblk->jn_oldfrags; i < jnewblk->jn_frags;
+			    i++) {
+				if (isclr(blksfree, bno + i)) {
+					cleared = 1;
+					setbit(blksfree, bno + i);
+				}
+			}
+			/*
+			 * We may not clear the block if it's a background
+			 * copy.  In that case there is no reason to detach
+			 * it.
+			 */
+			if (cleared) {
+				jnewblk->jn_state &= ~ATTACHED;
+				jnewblk->jn_state |= UNDONE;
+			} else if ((bp->b_xflags & BX_BKGRDMARKER) == 0)
+				panic("initiate_write_bmsafemap: block %jd "
+				    "marked free", jnewblk->jn_blkno);
+		}
+	}
+	/*
+	 * Move allocation lists to the written lists so they can be
+	 * cleared once the block write is complete.
+	 */
+	LIST_SWAP(&bmsafemap->sm_inodedephd, &bmsafemap->sm_inodedepwr,
+	    inodedep, id_deps);
+	LIST_SWAP(&bmsafemap->sm_newblkhd, &bmsafemap->sm_newblkwr,
+	    newblk, nb_deps);
+}
+
+/*
  * This routine is called during the completion interrupt
  * service routine for a disk write (from the procedure called
  * by the device driver to inform the filesystem caches of
  * a request completion).  It should be called early in this
  * procedure, before the block is made available to other
  * processes or other routines are called.
+ *
  */
 static void 
 softdep_disk_write_complete(bp)
@@ -4254,12 +8514,7 @@ softdep_disk_write_complete(bp)
 	struct worklist *wk;
 	struct worklist *owk;
 	struct workhead reattach;
-	struct newblk *newblk;
-	struct allocindir *aip;
-	struct allocdirect *adp;
-	struct indirdep *indirdep;
-	struct inodedep *inodedep;
-	struct bmsafemap *bmsafemap;
+	struct buf *sbp;
 
 	/*
 	 * If an error occurred while doing the write, then the data
@@ -4271,8 +8526,9 @@ softdep_disk_write_complete(bp)
 	/*
 	 * This lock must not be released anywhere in this code segment.
 	 */
+	sbp = NULL;
+	owk = NULL;
 	ACQUIRE_LOCK(&lk);
-	owk = NULL;
 	while ((wk = LIST_FIRST(&bp->b_dep)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		if (wk == owk)
@@ -4291,33 +8547,8 @@ softdep_disk_write_complete(bp)
 			continue;
 
 		case D_BMSAFEMAP:
-			bmsafemap = WK_BMSAFEMAP(wk);
-			while ((newblk = LIST_FIRST(&bmsafemap->sm_newblkhd))) {
-				newblk->nb_state |= DEPCOMPLETE;
-				newblk->nb_bmsafemap = NULL;
-				LIST_REMOVE(newblk, nb_deps);
-			}
-			while ((adp =
-			   LIST_FIRST(&bmsafemap->sm_allocdirecthd))) {
-				adp->ad_state |= DEPCOMPLETE;
-				adp->ad_buf = NULL;
-				LIST_REMOVE(adp, ad_deps);
-				handle_allocdirect_partdone(adp);
-			}
-			while ((aip =
-			    LIST_FIRST(&bmsafemap->sm_allocindirhd))) {
-				aip->ai_state |= DEPCOMPLETE;
-				aip->ai_buf = NULL;
-				LIST_REMOVE(aip, ai_deps);
-				handle_allocindir_partdone(aip);
-			}
-			while ((inodedep =
-			     LIST_FIRST(&bmsafemap->sm_inodedephd)) != NULL) {
-				inodedep->id_state |= DEPCOMPLETE;
-				LIST_REMOVE(inodedep, id_deps);
-				inodedep->id_buf = NULL;
-			}
-			WORKITEM_FREE(bmsafemap, D_BMSAFEMAP);
+			if (handle_written_bmsafemap(WK_BMSAFEMAP(wk), bp))
+				WORKLIST_INSERT(&reattach, wk);
 			continue;
 
 		case D_MKDIR:
@@ -4325,37 +8556,47 @@ softdep_disk_write_complete(bp)
 			continue;
 
 		case D_ALLOCDIRECT:
-			adp = WK_ALLOCDIRECT(wk);
-			adp->ad_state |= COMPLETE;
-			handle_allocdirect_partdone(adp);
+			wk->wk_state |= COMPLETE;
+			handle_allocdirect_partdone(WK_ALLOCDIRECT(wk), NULL);
 			continue;
 
 		case D_ALLOCINDIR:
-			aip = WK_ALLOCINDIR(wk);
-			aip->ai_state |= COMPLETE;
-			handle_allocindir_partdone(aip);
+			wk->wk_state |= COMPLETE;
+			handle_allocindir_partdone(WK_ALLOCINDIR(wk));
 			continue;
 
 		case D_INDIRDEP:
-			indirdep = WK_INDIRDEP(wk);
-			if (indirdep->ir_state & GOINGAWAY)
-				panic("disk_write_complete: indirdep gone");
-			bcopy(indirdep->ir_saveddata, bp->b_data, bp->b_bcount);
-			free(indirdep->ir_saveddata, M_INDIRDEP);
-			indirdep->ir_saveddata = 0;
-			indirdep->ir_state &= ~UNDONE;
-			indirdep->ir_state |= ATTACHED;
-			while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != 0) {
-				handle_allocindir_partdone(aip);
-				if (aip == LIST_FIRST(&indirdep->ir_donehd))
-					panic("disk_write_complete: not gone");
-			}
-			WORKLIST_INSERT(&reattach, wk);
-			if ((bp->b_flags & B_DELWRI) == 0)
-				stat_indir_blk_ptrs++;
-			bdirty(bp);
+			if (handle_written_indirdep(WK_INDIRDEP(wk), bp, &sbp))
+				WORKLIST_INSERT(&reattach, wk);
 			continue;
 
+		case D_FREEBLKS:
+			wk->wk_state |= COMPLETE;
+			if ((wk->wk_state & ALLCOMPLETE) == ALLCOMPLETE)
+				add_to_worklist(wk, 1);
+			continue;
+
+		case D_FREEWORK:
+			handle_written_freework(WK_FREEWORK(wk));
+			break;
+
+		case D_FREEDEP:
+			free_freedep(WK_FREEDEP(wk));
+			continue;
+
+		case D_JSEGDEP:
+			free_jsegdep(WK_JSEGDEP(wk));
+			continue;
+
+		case D_JSEG:
+			handle_written_jseg(WK_JSEG(wk), bp);
+			continue;
+
+		case D_SBDEP:
+			if (handle_written_sbdep(WK_SBDEP(wk), bp))
+				WORKLIST_INSERT(&reattach, wk);
+			continue;
+
 		default:
 			panic("handle_disk_write_complete: Unknown type %s",
 			    TYPENAME(wk->wk_type));
@@ -4370,6 +8611,8 @@ softdep_disk_write_complete(bp)
 		WORKLIST_INSERT(&bp->b_dep, wk);
 	}
 	FREE_LOCK(&lk);
+	if (sbp)
+		brelse(sbp);
 }
 
 /*
@@ -4378,18 +8621,17 @@ softdep_disk_write_complete(bp)
  * splbio interrupts blocked.
  */
 static void 
-handle_allocdirect_partdone(adp)
+handle_allocdirect_partdone(adp, wkhd)
 	struct allocdirect *adp;	/* the completed allocdirect */
+	struct workhead *wkhd;		/* Work to do when inode is writtne. */
 {
 	struct allocdirectlst *listhead;
 	struct allocdirect *listadp;
 	struct inodedep *inodedep;
-	long bsize, delay;
+	long bsize;
 
 	if ((adp->ad_state & ALLCOMPLETE) != ALLCOMPLETE)
 		return;
-	if (adp->ad_buf != NULL)
-		panic("handle_allocdirect_partdone: dangling dep");
 	/*
 	 * The on-disk inode cannot claim to be any larger than the last
 	 * fragment that has been written. Otherwise, the on-disk inode
@@ -4439,25 +8681,27 @@ static void
 		return;
 	}
 	/*
-	 * If we have found the just finished dependency, then free
+	 * If we have found the just finished dependency, then queue
 	 * it along with anything that follows it that is complete.
-	 * If the inode still has a bitmap dependency, then it has
-	 * never been written to disk, hence the on-disk inode cannot
-	 * reference the old fragment so we can free it without delay.
+	 * Since the pointer has not yet been written in the inode
+	 * as the dependency prevents it, place the allocdirect on the
+	 * bufwait list where it will be freed once the pointer is
+	 * valid.
 	 */
-	delay = (inodedep->id_state & DEPCOMPLETE);
+	if (wkhd == NULL)
+		wkhd = &inodedep->id_bufwait;
 	for (; adp; adp = listadp) {
 		listadp = TAILQ_NEXT(adp, ad_next);
 		if ((adp->ad_state & ALLCOMPLETE) != ALLCOMPLETE)
 			return;
-		free_allocdirect(listhead, adp, delay);
+		TAILQ_REMOVE(listhead, adp, ad_next);
+		WORKLIST_INSERT(wkhd, &adp->ad_block.nb_list);
 	}
 }
 
 /*
- * Called from within softdep_disk_write_complete above. Note that
- * this routine is always called from interrupt level with further
- * splbio interrupts blocked.
+ * Called from within softdep_disk_write_complete above.  This routine
+ * completes successfully written allocindirs.
  */
 static void
 handle_allocindir_partdone(aip)
@@ -4467,11 +8711,9 @@ handle_allocindir_partdone(aip)
 
 	if ((aip->ai_state & ALLCOMPLETE) != ALLCOMPLETE)
 		return;
-	if (aip->ai_buf != NULL)
-		panic("handle_allocindir_partdone: dangling dependency");
 	indirdep = aip->ai_indirdep;
+	LIST_REMOVE(aip, ai_next);
 	if (indirdep->ir_state & UNDONE) {
-		LIST_REMOVE(aip, ai_next);
 		LIST_INSERT_HEAD(&indirdep->ir_donehd, aip, ai_next);
 		return;
 	}
@@ -4481,13 +8723,130 @@ handle_allocindir_partdone(aip)
 	else
 		((ufs2_daddr_t *)indirdep->ir_savebp->b_data)[aip->ai_offset] =
 		    aip->ai_newblkno;
-	LIST_REMOVE(aip, ai_next);
-	if (aip->ai_freefrag != NULL)
-		add_to_worklist(&aip->ai_freefrag->ff_list);
-	WORKITEM_FREE(aip, D_ALLOCINDIR);
+	/*
+	 * Await the pointer write before freeing the allocindir.
+	 */
+	LIST_INSERT_HEAD(&indirdep->ir_writehd, aip, ai_next);
 }
 
 /*
+ * Release segments held on a jwork list.
+ */
+static void
+handle_jwork(wkhd)
+	struct workhead *wkhd;
+{
+	struct worklist *wk;
+
+	while ((wk = LIST_FIRST(wkhd)) != NULL) {
+		WORKLIST_REMOVE(wk);
+		switch (wk->wk_type) {
+		case D_JSEGDEP:
+			free_jsegdep(WK_JSEGDEP(wk));
+			continue;
+		default:
+			panic("handle_jwork: Unknown type %s\n",
+			    TYPENAME(wk->wk_type));
+		}
+	}
+}
+
+/*
+ * Handle the bufwait list on an inode when it is safe to release items
+ * held there.  This normally happens after an inode block is written but
+ * may be delayed and handle later if there are pending journal items that
+ * are not yet safe to be released.
+ */
+static struct freefile *
+handle_bufwait(inodedep, refhd)
+	struct inodedep *inodedep;
+	struct workhead *refhd;
+{
+	struct jaddref *jaddref;
+	struct freefile *freefile;
+	struct worklist *wk;
+
+	freefile = NULL;
+	while ((wk = LIST_FIRST(&inodedep->id_bufwait)) != NULL) {
+		WORKLIST_REMOVE(wk);
+		switch (wk->wk_type) {
+		case D_FREEFILE:
+			/*
+			 * We defer adding freefile to the worklist
+			 * until all other additions have been made to
+			 * ensure that it will be done after all the
+			 * old blocks have been freed.
+			 */
+			if (freefile != NULL)
+				panic("handle_bufwait: freefile");
+			freefile = WK_FREEFILE(wk);
+			continue;
+
+		case D_MKDIR:
+			handle_written_mkdir(WK_MKDIR(wk), MKDIR_PARENT);
+			continue;
+
+		case D_DIRADD:
+			diradd_inode_written(WK_DIRADD(wk), inodedep);
+			continue;
+
+		case D_FREEFRAG:
+			wk->wk_state |= COMPLETE;
+			if ((wk->wk_state & ALLCOMPLETE) == ALLCOMPLETE)
+				add_to_worklist(wk, 0);
+			continue;
+
+		case D_DIRREM:
+			wk->wk_state |= COMPLETE;
+			add_to_worklist(wk, 0);
+			continue;
+
+		case D_ALLOCDIRECT:
+		case D_ALLOCINDIR:
+			free_newblk(WK_NEWBLK(wk));
+			continue;
+
+		case D_JNEWBLK:
+			wk->wk_state |= COMPLETE;
+			free_jnewblk(WK_JNEWBLK(wk));
+			continue;
+
+		/*
+		 * Save freed journal segments and add references on
+		 * the supplied list which will delay their release
+		 * until the cg bitmap is cleared on disk.
+		 */
+		case D_JSEGDEP:
+			if (refhd == NULL)
+				free_jsegdep(WK_JSEGDEP(wk));
+			else
+				WORKLIST_INSERT(refhd, wk);
+			continue;
+
+		case D_JADDREF:
+			jaddref = WK_JADDREF(wk);
+			TAILQ_REMOVE(&inodedep->id_inoreflst, &jaddref->ja_ref,
+			    if_deps);
+			/*
+			 * Transfer any jaddrefs to the list to be freed with
+			 * the bitmap if we're handling a removed file.
+			 */
+			if (refhd == NULL) {
+				wk->wk_state |= COMPLETE;
+				free_jaddref(jaddref);
+			} else
+				WORKLIST_INSERT(refhd, wk);
+			continue;
+
+		default:
+			panic("handle_bufwait: Unknown type %p(%s)",
+			    wk, TYPENAME(wk->wk_type));
+			/* NOTREACHED */
+		}
+	}
+	return (freefile);
+}
+/*
  * Called from within softdep_disk_write_complete above to restore
  * in-memory inode block contents to their most up-to-date state. Note
  * that this routine is always called from interrupt level with further
@@ -4498,12 +8857,17 @@ handle_written_inodeblock(inodedep, bp)
 	struct inodedep *inodedep;
 	struct buf *bp;		/* buffer containing the inode block */
 {
-	struct worklist *wk, *filefree;
+	struct freefile *freefile;
 	struct allocdirect *adp, *nextadp;
 	struct ufs1_dinode *dp1 = NULL;
 	struct ufs2_dinode *dp2 = NULL;
+	struct workhead wkhd;
 	int hadchanges, fstype;
+	ino_t freelink;
 
+	LIST_INIT(&wkhd);
+	hadchanges = 0;
+	freefile = NULL;
 	if ((inodedep->id_state & IOSTARTED) == 0)
 		panic("handle_written_inodeblock: not started");
 	inodedep->id_state &= ~IOSTARTED;
@@ -4511,12 +8875,30 @@ handle_written_inodeblock(inodedep, bp)
 		fstype = UFS1;
 		dp1 = (struct ufs1_dinode *)bp->b_data +
 		    ino_to_fsbo(inodedep->id_fs, inodedep->id_ino);
+		freelink = dp1->di_freelink;
 	} else {
 		fstype = UFS2;
 		dp2 = (struct ufs2_dinode *)bp->b_data +
 		    ino_to_fsbo(inodedep->id_fs, inodedep->id_ino);
+		freelink = dp2->di_freelink;
 	}
 	/*
+	 * If we wrote a freelink pointer during the last write record it
+	 * here.  If we did not, keep the buffer dirty until we do.
+	 */
+	if ((inodedep->id_state & (UNLINKED | UNLINKNEXT)) == UNLINKED) {
+		struct inodedep *inon;
+
+		inon = TAILQ_NEXT(inodedep, id_unlinked);
+		if ((inon == NULL && freelink == 0) ||
+		    (inon && inon->id_ino == freelink)) {
+			if (inon)
+				inon->id_state |= UNLINKPREV;
+			inodedep->id_state |= UNLINKNEXT;
+		} else
+			hadchanges = 1;
+	}
+	/*
 	 * If we had to rollback the inode allocation because of
 	 * bitmaps being incomplete, then simply restore it.
 	 * Keep the block dirty so that it will not be reclaimed until
@@ -4524,6 +8906,7 @@ handle_written_inodeblock(inodedep, bp)
 	 * corresponding updates written to disk.
 	 */
 	if (inodedep->id_savedino1 != NULL) {
+		hadchanges = 1;
 		if (fstype == UFS1)
 			*dp1 = *inodedep->id_savedino1;
 		else
@@ -4533,6 +8916,13 @@ handle_written_inodeblock(inodedep, bp)
 		if ((bp->b_flags & B_DELWRI) == 0)
 			stat_inode_bitmap++;
 		bdirty(bp);
+		/*
+		 * If the inode is clear here and GOINGAWAY it will never
+		 * be written.  Process the bufwait and clear any pending
+		 * work which may include the freefile.
+		 */
+		if (inodedep->id_state & GOINGAWAY)
+			goto bufwait;
 		return (1);
 	}
 	inodedep->id_state |= COMPLETE;
@@ -4540,50 +8930,49 @@ handle_written_inodeblock(inodedep, bp)
 	 * Roll forward anything that had to be rolled back before 
 	 * the inode could be updated.
 	 */
-	hadchanges = 0;
 	for (adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp; adp = nextadp) {
 		nextadp = TAILQ_NEXT(adp, ad_next);
 		if (adp->ad_state & ATTACHED)
 			panic("handle_written_inodeblock: new entry");
 		if (fstype == UFS1) {
-			if (adp->ad_lbn < NDADDR) {
-				if (dp1->di_db[adp->ad_lbn]!=adp->ad_oldblkno)
+			if (adp->ad_offset < NDADDR) {
+				if (dp1->di_db[adp->ad_offset]!=adp->ad_oldblkno)
 					panic("%s %s #%jd mismatch %d != %jd",
 					    "handle_written_inodeblock:",
 					    "direct pointer",
-					    (intmax_t)adp->ad_lbn,
-					    dp1->di_db[adp->ad_lbn],
+					    (intmax_t)adp->ad_offset,
+					    dp1->di_db[adp->ad_offset],
 					    (intmax_t)adp->ad_oldblkno);
-				dp1->di_db[adp->ad_lbn] = adp->ad_newblkno;
+				dp1->di_db[adp->ad_offset] = adp->ad_newblkno;
 			} else {
-				if (dp1->di_ib[adp->ad_lbn - NDADDR] != 0)
+				if (dp1->di_ib[adp->ad_offset - NDADDR] != 0)
 					panic("%s: %s #%jd allocated as %d",
 					    "handle_written_inodeblock",
 					    "indirect pointer",
-					    (intmax_t)adp->ad_lbn - NDADDR,
-					    dp1->di_ib[adp->ad_lbn - NDADDR]);
-				dp1->di_ib[adp->ad_lbn - NDADDR] =
+					    (intmax_t)adp->ad_offset - NDADDR,
+					    dp1->di_ib[adp->ad_offset - NDADDR]);
+				dp1->di_ib[adp->ad_offset - NDADDR] =
 				    adp->ad_newblkno;
 			}
 		} else {
-			if (adp->ad_lbn < NDADDR) {
-				if (dp2->di_db[adp->ad_lbn]!=adp->ad_oldblkno)
+			if (adp->ad_offset < NDADDR) {
+				if (dp2->di_db[adp->ad_offset]!=adp->ad_oldblkno)
 					panic("%s: %s #%jd %s %jd != %jd",
 					    "handle_written_inodeblock",
 					    "direct pointer",
-					    (intmax_t)adp->ad_lbn, "mismatch",
-					    (intmax_t)dp2->di_db[adp->ad_lbn],
+					    (intmax_t)adp->ad_offset, "mismatch",
+					    (intmax_t)dp2->di_db[adp->ad_offset],
 					    (intmax_t)adp->ad_oldblkno);
-				dp2->di_db[adp->ad_lbn] = adp->ad_newblkno;
+				dp2->di_db[adp->ad_offset] = adp->ad_newblkno;
 			} else {
-				if (dp2->di_ib[adp->ad_lbn - NDADDR] != 0)
+				if (dp2->di_ib[adp->ad_offset - NDADDR] != 0)
 					panic("%s: %s #%jd allocated as %jd",
 					    "handle_written_inodeblock",
 					    "indirect pointer",
-					    (intmax_t)adp->ad_lbn - NDADDR,
+					    (intmax_t)adp->ad_offset - NDADDR,
 					    (intmax_t)
-					    dp2->di_ib[adp->ad_lbn - NDADDR]);
-				dp2->di_ib[adp->ad_lbn - NDADDR] =
+					    dp2->di_ib[adp->ad_offset - NDADDR]);
+				dp2->di_ib[adp->ad_offset - NDADDR] =
 				    adp->ad_newblkno;
 			}
 		}
@@ -4595,13 +8984,13 @@ handle_written_inodeblock(inodedep, bp)
 		nextadp = TAILQ_NEXT(adp, ad_next);
 		if (adp->ad_state & ATTACHED)
 			panic("handle_written_inodeblock: new entry");
-		if (dp2->di_extb[adp->ad_lbn] != adp->ad_oldblkno)
+		if (dp2->di_extb[adp->ad_offset] != adp->ad_oldblkno)
 			panic("%s: direct pointers #%jd %s %jd != %jd",
 			    "handle_written_inodeblock",
-			    (intmax_t)adp->ad_lbn, "mismatch",
-			    (intmax_t)dp2->di_extb[adp->ad_lbn],
+			    (intmax_t)adp->ad_offset, "mismatch",
+			    (intmax_t)dp2->di_extb[adp->ad_offset],
 			    (intmax_t)adp->ad_oldblkno);
-		dp2->di_extb[adp->ad_lbn] = adp->ad_newblkno;
+		dp2->di_extb[adp->ad_offset] = adp->ad_newblkno;
 		adp->ad_state &= ~UNDONE;
 		adp->ad_state |= ATTACHED;
 		hadchanges = 1;
@@ -4613,12 +9002,23 @@ handle_written_inodeblock(inodedep, bp)
 	 */
 	if (inodedep->id_savedsize == -1 || inodedep->id_savedextsize == -1)
 		panic("handle_written_inodeblock: bad size");
+	if (inodedep->id_savednlink > LINK_MAX)
+		panic("handle_written_inodeblock: Invalid link count "
+		    "%d for inodedep %p", inodedep->id_savednlink, inodedep);
 	if (fstype == UFS1) {
+		if (dp1->di_nlink != inodedep->id_savednlink) { 
+			dp1->di_nlink = inodedep->id_savednlink;
+			hadchanges = 1;
+		}
 		if (dp1->di_size != inodedep->id_savedsize) {
 			dp1->di_size = inodedep->id_savedsize;
 			hadchanges = 1;
 		}
 	} else {
+		if (dp2->di_nlink != inodedep->id_savednlink) { 
+			dp2->di_nlink = inodedep->id_savednlink;
+			hadchanges = 1;
+		}
 		if (dp2->di_size != inodedep->id_savedsize) {
 			dp2->di_size = inodedep->id_savedsize;
 			hadchanges = 1;
@@ -4630,6 +9030,7 @@ handle_written_inodeblock(inodedep, bp)
 	}
 	inodedep->id_savedsize = -1;
 	inodedep->id_savedextsize = -1;
+	inodedep->id_savednlink = -1;
 	/*
 	 * If there were any rollbacks in the inode block, then it must be
 	 * marked dirty so that its will eventually get written back in
@@ -4637,69 +9038,49 @@ handle_written_inodeblock(inodedep, bp)
 	 */
 	if (hadchanges)
 		bdirty(bp);
+bufwait:
 	/*
 	 * Process any allocdirects that completed during the update.
 	 */
 	if ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != NULL)
-		handle_allocdirect_partdone(adp);
+		handle_allocdirect_partdone(adp, &wkhd);
 	if ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != NULL)
-		handle_allocdirect_partdone(adp);
+		handle_allocdirect_partdone(adp, &wkhd);
 	/*
 	 * Process deallocations that were held pending until the
 	 * inode had been written to disk. Freeing of the inode
 	 * is delayed until after all blocks have been freed to
 	 * avoid creation of new <vfsid, inum, lbn> triples
-	 * before the old ones have been deleted.
+	 * before the old ones have been deleted.  Completely
+	 * unlinked inodes are not processed until the unlinked
+	 * inode list is written or the last reference is removed.
 	 */
-	filefree = NULL;
-	while ((wk = LIST_FIRST(&inodedep->id_bufwait)) != NULL) {
-		WORKLIST_REMOVE(wk);
-		switch (wk->wk_type) {
-
-		case D_FREEFILE:
-			/*
-			 * We defer adding filefree to the worklist until
-			 * all other additions have been made to ensure
-			 * that it will be done after all the old blocks
-			 * have been freed.
-			 */
-			if (filefree != NULL)
-				panic("handle_written_inodeblock: filefree");
-			filefree = wk;
-			continue;
-
-		case D_MKDIR:
-			handle_written_mkdir(WK_MKDIR(wk), MKDIR_PARENT);
-			continue;
-
-		case D_DIRADD:
-			diradd_inode_written(WK_DIRADD(wk), inodedep);
-			continue;
-
-		case D_FREEBLKS:
-			wk->wk_state |= COMPLETE;
-			if ((wk->wk_state  & ALLCOMPLETE) != ALLCOMPLETE)
-				continue;
-			 /* -- fall through -- */
-		case D_FREEFRAG:
-		case D_DIRREM:
-			add_to_worklist(wk);
-			continue;
-
-		case D_NEWDIRBLK:
-			free_newdirblk(WK_NEWDIRBLK(wk));
-			continue;
-
-		default:
-			panic("handle_written_inodeblock: Unknown type %s",
-			    TYPENAME(wk->wk_type));
-			/* NOTREACHED */
+	if ((inodedep->id_state & UNLINKED) == 0) {
+		freefile = handle_bufwait(inodedep, NULL);
+		if (freefile && !LIST_EMPTY(&wkhd)) {
+			WORKLIST_INSERT(&wkhd, &freefile->fx_list);
+			freefile = NULL;
 		}
 	}
-	if (filefree != NULL) {
+	/*
+	 * Move rolled forward dependency completions to the bufwait list
+	 * now that those that were already written have been processed.
+	 */
+	if (!LIST_EMPTY(&wkhd) && hadchanges == 0)
+		panic("handle_written_inodeblock: bufwait but no changes");
+	jwork_move(&inodedep->id_bufwait, &wkhd);
+
+	if (freefile != NULL) {
+		/*
+		 * If the inode is goingaway it was never written.  Fake up
+		 * the state here so free_inodedep() can succeed.
+		 */
+		if (inodedep->id_state & GOINGAWAY)
+			inodedep->id_state |= COMPLETE | DEPCOMPLETE;
 		if (free_inodedep(inodedep) == 0)
-			panic("handle_written_inodeblock: live inodedep");
-		add_to_worklist(filefree);
+			panic("handle_written_inodeblock: live inodedep %p",
+			    inodedep);
+		add_to_worklist(&freefile->fx_list, 0);
 		return (0);
 	}
 
@@ -4707,12 +9088,101 @@ handle_written_inodeblock(inodedep, bp)
 	 * If no outstanding dependencies, free it.
 	 */
 	if (free_inodedep(inodedep) ||
-	    (TAILQ_FIRST(&inodedep->id_inoupdt) == 0 &&
-	     TAILQ_FIRST(&inodedep->id_extupdt) == 0))
+	    (TAILQ_FIRST(&inodedep->id_inoreflst) == 0 &&
+	     TAILQ_FIRST(&inodedep->id_inoupdt) == 0 &&
+	     TAILQ_FIRST(&inodedep->id_extupdt) == 0 &&
+	     LIST_FIRST(&inodedep->id_bufwait) == 0))
 		return (0);
 	return (hadchanges);
 }
 
+static int
+handle_written_indirdep(indirdep, bp, bpp)
+	struct indirdep *indirdep;
+	struct buf *bp;
+	struct buf **bpp;
+{
+	struct allocindir *aip;
+	int chgs;
+
+	if (indirdep->ir_state & GOINGAWAY)
+		panic("disk_write_complete: indirdep gone");
+	chgs = 0;
+	/*
+	 * If there were rollbacks revert them here.
+	 */
+	if (indirdep->ir_saveddata) {
+		bcopy(indirdep->ir_saveddata, bp->b_data, bp->b_bcount);
+		free(indirdep->ir_saveddata, M_INDIRDEP);
+		indirdep->ir_saveddata = 0;
+		chgs = 1;
+	}
+	indirdep->ir_state &= ~UNDONE;
+	indirdep->ir_state |= ATTACHED;
+	/*
+	 * Move allocindirs with written pointers to the completehd if
+	 * the the indirdep's pointer is not yet written.  Otherwise
+	 * free them here.
+	 */
+	while ((aip = LIST_FIRST(&indirdep->ir_writehd)) != 0) {
+		LIST_REMOVE(aip, ai_next);
+		if ((indirdep->ir_state & DEPCOMPLETE) == 0) {
+			LIST_INSERT_HEAD(&indirdep->ir_completehd, aip,
+			    ai_next);
+			continue;
+		}
+		free_newblk(&aip->ai_block);
+	}
+	/*
+	 * Move allocindirs that have finished dependency processing from
+	 * the done list to the write list after updating the pointers.
+	 */
+	while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != 0) {
+		handle_allocindir_partdone(aip);
+		if (aip == LIST_FIRST(&indirdep->ir_donehd))
+			panic("disk_write_complete: not gone");
+		chgs = 1;
+	}
+	/*
+	 * If this indirdep has been detached from its newblk during
+	 * I/O we need to keep this dep attached to the buffer so
+	 * deallocate_dependencies can find it and properly resolve
+	 * any outstanding dependencies.
+	 */
+	if ((indirdep->ir_state & (ONDEPLIST | DEPCOMPLETE)) == 0)
+		chgs = 1;
+	if ((bp->b_flags & B_DELWRI) == 0)
+		stat_indir_blk_ptrs++;
+	/*
+	 * If there were no changes we can discard the savedbp and detach
+	 * ourselves from the buf.  We are only carrying completed pointers
+	 * in this case.
+	 */
+	if (chgs == 0) {
+		struct buf *sbp;
+
+		sbp = indirdep->ir_savebp;
+		sbp->b_flags |= B_INVAL | B_NOCACHE;
+		indirdep->ir_savebp = NULL;
+		if (*bpp != NULL)
+			panic("handle_written_indirdep: bp already exists.");
+		*bpp = sbp;
+	} else
+		bdirty(bp);
+	/*
+	 * If there are no fresh dependencies and none waiting on writes
+	 * we can free the indirdep. 
+	 */
+	if ((indirdep->ir_state & DEPCOMPLETE) && chgs == 0) {
+		if (indirdep->ir_state & ONDEPLIST)
+			LIST_REMOVE(indirdep, ir_next);
+		free_indirdep(indirdep);
+		return (0);
+	}
+
+	return (chgs);
+}
+
 /*
  * Process a diradd entry after its dependent inode has been written.
  * This routine must be called with splbio interrupts blocked.
@@ -4722,50 +9192,200 @@ diradd_inode_written(dap, inodedep)
 	struct diradd *dap;
 	struct inodedep *inodedep;
 {
-	struct pagedep *pagedep;
 
 	dap->da_state |= COMPLETE;
-	if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) {
-		if (dap->da_state & DIRCHG)
-			pagedep = dap->da_previous->dm_pagedep;
-		else
-			pagedep = dap->da_pagedep;
-		LIST_REMOVE(dap, da_pdlist);
-		LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist);
-	}
+	complete_diradd(dap);
 	WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list);
 }
 
 /*
- * Handle the completion of a mkdir dependency.
+ * Returns true if the bmsafemap will have rollbacks when written.  Must
+ * only be called with lk and the buf lock on the cg held.
  */
+static int
+bmsafemap_rollbacks(bmsafemap)
+	struct bmsafemap *bmsafemap;
+{
+
+	return (!LIST_EMPTY(&bmsafemap->sm_jaddrefhd) | 
+	    !LIST_EMPTY(&bmsafemap->sm_jnewblkhd));
+}
+
+/*
+ * Complete a write to a bmsafemap structure.  Roll forward any bitmap
+ * changes if it's not a background write.  Set all written dependencies 
+ * to DEPCOMPLETE and free the structure if possible.
+ */
+static int
+handle_written_bmsafemap(bmsafemap, bp)
+	struct bmsafemap *bmsafemap;
+	struct buf *bp;
+{
+	struct newblk *newblk;
+	struct inodedep *inodedep;
+	struct jaddref *jaddref, *jatmp;
+	struct jnewblk *jnewblk, *jntmp;
+	uint8_t *inosused;
+	uint8_t *blksfree;
+	struct cg *cgp;
+	struct fs *fs;
+	ino_t ino;
+	long bno;
+	int chgs;
+	int i;
+
+	if ((bmsafemap->sm_state & IOSTARTED) == 0)
+		panic("initiate_write_bmsafemap: Not started\n");
+	chgs = 0;
+	bmsafemap->sm_state &= ~IOSTARTED;
+	/*
+	 * Restore unwritten inode allocation pending jaddref writes.
+	 */
+	if (!LIST_EMPTY(&bmsafemap->sm_jaddrefhd)) {
+		cgp = (struct cg *)bp->b_data;
+		fs = VFSTOUFS(bmsafemap->sm_list.wk_mp)->um_fs;
+		inosused = cg_inosused(cgp);
+		LIST_FOREACH_SAFE(jaddref, &bmsafemap->sm_jaddrefhd,
+		    ja_bmdeps, jatmp) {
+			if ((jaddref->ja_state & UNDONE) == 0)
+				continue;
+			ino = jaddref->ja_ino % fs->fs_ipg;
+			if (isset(inosused, ino))
+				panic("handle_written_bmsafemap: "
+				    "re-allocated inode");
+			if ((bp->b_xflags & BX_BKGRDMARKER) == 0) {
+				if ((jaddref->ja_mode & IFMT) == IFDIR)
+					cgp->cg_cs.cs_ndir++;
+				cgp->cg_cs.cs_nifree--;
+				setbit(inosused, ino);
+				chgs = 1;
+			}
+			jaddref->ja_state &= ~UNDONE;
+			jaddref->ja_state |= ATTACHED;
+			free_jaddref(jaddref);
+		}
+	}
+	/*
+	 * Restore any block allocations which are pending journal writes.
+	 */
+	if (LIST_FIRST(&bmsafemap->sm_jnewblkhd) != NULL) {
+		cgp = (struct cg *)bp->b_data;
+		fs = VFSTOUFS(bmsafemap->sm_list.wk_mp)->um_fs;
+		blksfree = cg_blksfree(cgp);
+		LIST_FOREACH_SAFE(jnewblk, &bmsafemap->sm_jnewblkhd, jn_deps,
+		    jntmp) {
+			if ((jnewblk->jn_state & UNDONE) == 0)
+				continue;
+			bno = dtogd(fs, jnewblk->jn_blkno);
+			for (i = jnewblk->jn_oldfrags; i < jnewblk->jn_frags;
+			    i++) {
+				if (bp->b_xflags & BX_BKGRDMARKER)
+					break;
+				if ((jnewblk->jn_state & NEWBLOCK) == 0 &&
+				    isclr(blksfree, bno + i))
+					panic("handle_written_bmsafemap: "
+					    "re-allocated fragment");
+				clrbit(blksfree, bno + i);
+				chgs = 1;
+			}
+			jnewblk->jn_state &= ~(UNDONE | NEWBLOCK);
+			jnewblk->jn_state |= ATTACHED;
+			free_jnewblk(jnewblk);
+		}
+	}
+	while ((newblk = LIST_FIRST(&bmsafemap->sm_newblkwr))) {
+		newblk->nb_state |= DEPCOMPLETE;
+		newblk->nb_state &= ~ONDEPLIST;
+		newblk->nb_bmsafemap = NULL;
+		LIST_REMOVE(newblk, nb_deps);
+		if (newblk->nb_list.wk_type == D_ALLOCDIRECT)
+			handle_allocdirect_partdone(
+			    WK_ALLOCDIRECT(&newblk->nb_list), NULL);
+		else if (newblk->nb_list.wk_type == D_ALLOCINDIR)
+			handle_allocindir_partdone(
+			    WK_ALLOCINDIR(&newblk->nb_list));
+		else if (newblk->nb_list.wk_type != D_NEWBLK)
+			panic("handle_written_bmsafemap: Unexpected type: %s",
+			    TYPENAME(newblk->nb_list.wk_type));
+	}
+	while ((inodedep = LIST_FIRST(&bmsafemap->sm_inodedepwr)) != NULL) {
+		inodedep->id_state |= DEPCOMPLETE;
+		inodedep->id_state &= ~ONDEPLIST;
+		LIST_REMOVE(inodedep, id_deps);
+		inodedep->id_bmsafemap = NULL;
+	}
+	if (LIST_EMPTY(&bmsafemap->sm_jaddrefhd) &&
+	    LIST_EMPTY(&bmsafemap->sm_jnewblkhd) &&
+	    LIST_EMPTY(&bmsafemap->sm_newblkhd) &&
+	    LIST_EMPTY(&bmsafemap->sm_inodedephd)) {
+		if (chgs)
+			bdirty(bp);
+		LIST_REMOVE(bmsafemap, sm_hash);
+		WORKITEM_FREE(bmsafemap, D_BMSAFEMAP);
+		return (0);
+	}
+	bdirty(bp);
+	return (1);
+}
+
+/*
+ * Try to free a mkdir dependency.
+ */
 static void
-handle_written_mkdir(mkdir, type)
+complete_mkdir(mkdir)
 	struct mkdir *mkdir;
-	int type;
 {
 	struct diradd *dap;
-	struct pagedep *pagedep;
 
-	if (mkdir->md_state != type)
-		panic("handle_written_mkdir: bad type");
+	if ((mkdir->md_state & ALLCOMPLETE) != ALLCOMPLETE)
+		return;
+	LIST_REMOVE(mkdir, md_mkdirs);
 	dap = mkdir->md_diradd;
-	dap->da_state &= ~type;
-	if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) == 0)
+	dap->da_state &= ~(mkdir->md_state & (MKDIR_PARENT | MKDIR_BODY));
+	if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) == 0) {
 		dap->da_state |= DEPCOMPLETE;
-	if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) {
-		if (dap->da_state & DIRCHG)
-			pagedep = dap->da_previous->dm_pagedep;
-		else
-			pagedep = dap->da_pagedep;
-		LIST_REMOVE(dap, da_pdlist);
-		LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist);
+		complete_diradd(dap);
 	}
-	LIST_REMOVE(mkdir, md_mkdirs);
 	WORKITEM_FREE(mkdir, D_MKDIR);
 }
 
 /*
+ * Handle the completion of a mkdir dependency.
+ */
+static void
+handle_written_mkdir(mkdir, type)
+	struct mkdir *mkdir;
+	int type;
+{
+
+	if ((mkdir->md_state & (MKDIR_PARENT | MKDIR_BODY)) != type)
+		panic("handle_written_mkdir: bad type");
+	mkdir->md_state |= COMPLETE;
+	complete_mkdir(mkdir);
+}
+
+static void
+free_pagedep(pagedep)
+	struct pagedep *pagedep;
+{
+	int i;
+
+	if (pagedep->pd_state & (NEWBLOCK | ONWORKLIST))
+		return;
+	for (i = 0; i < DAHASHSZ; i++)
+		if (!LIST_EMPTY(&pagedep->pd_diraddhd[i]))
+			return;
+	if (!LIST_EMPTY(&pagedep->pd_jmvrefhd))
+		return;
+	if (!LIST_EMPTY(&pagedep->pd_dirremhd))
+		return;
+	if (!LIST_EMPTY(&pagedep->pd_pendinghd))
+		return;
+	LIST_REMOVE(pagedep, pd_hash);
+	WORKITEM_FREE(pagedep, D_PAGEDEP);
+}
+
+/*
  * Called from within softdep_disk_write_complete above.
  * A write operation was just completed. Removed inodes can
  * now be freed and associated block pointers may be committed.
@@ -4790,8 +9410,11 @@ handle_written_filepage(pagedep, bp)
 	 */
 	while ((dirrem = LIST_FIRST(&pagedep->pd_dirremhd)) != NULL) {
 		LIST_REMOVE(dirrem, dm_next);
+		dirrem->dm_state |= COMPLETE;
 		dirrem->dm_dirinum = pagedep->pd_ino;
-		add_to_worklist(&dirrem->dm_list);
+		KASSERT(LIST_EMPTY(&dirrem->dm_jremrefhd),
+		    ("handle_written_filepage: Journal entries not written."));
+		add_to_worklist(&dirrem->dm_list, 0);
 	}
 	/*
 	 * Free any directory additions that have been committed.
@@ -4800,7 +9423,7 @@ handle_written_filepage(pagedep, bp)
 	 */
 	if ((pagedep->pd_state & NEWBLOCK) == 0)
 		while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL)
-			free_diradd(dap);
+			free_diradd(dap, NULL);
 	/*
 	 * Uncommitted directory entries must be restored.
 	 */
@@ -4845,7 +9468,8 @@ handle_written_filepage(pagedep, bp)
 	 * Otherwise it will remain to track any new entries on
 	 * the page in case they are fsync'ed.
 	 */
-	if ((pagedep->pd_state & NEWBLOCK) == 0) {
+	if ((pagedep->pd_state & NEWBLOCK) == 0 &&
+	    LIST_EMPTY(&pagedep->pd_jmvrefhd)) {
 		LIST_REMOVE(pagedep, pd_hash);
 		WORKITEM_FREE(pagedep, D_PAGEDEP);
 	}
@@ -4880,8 +9504,8 @@ softdep_load_inodeblock(ip)
 	 */
 	ip->i_effnlink = ip->i_nlink;
 	ACQUIRE_LOCK(&lk);
-	if (inodedep_lookup(UFSTOVFS(ip->i_ump),
-	    ip->i_number, 0, &inodedep) == 0) {
+	if (inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, 0,
+	    &inodedep) == 0) {
 		FREE_LOCK(&lk);
 		return;
 	}
@@ -4908,6 +9532,7 @@ softdep_update_inodeblock(ip, bp, waitfor)
 	int waitfor;		/* nonzero => update must be allowed */
 {
 	struct inodedep *inodedep;
+	struct inoref *inoref;
 	struct worklist *wk;
 	struct mount *mp;
 	struct buf *ibp;
@@ -4922,6 +9547,7 @@ softdep_update_inodeblock(ip, bp, waitfor)
 	 */
 	mp = UFSTOVFS(ip->i_ump);
 	ACQUIRE_LOCK(&lk);
+again:
 	if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) == 0) {
 		FREE_LOCK(&lk);
 		if (ip->i_effnlink != ip->i_nlink)
@@ -4931,6 +9557,19 @@ softdep_update_inodeblock(ip, bp, waitfor)
 	if (inodedep->id_nlinkdelta != ip->i_nlink - ip->i_effnlink)
 		panic("softdep_update_inodeblock: bad delta");
 	/*
+	 * If we're flushing all dependencies we must also move any waiting
+	 * for journal writes onto the bufwait list prior to I/O.
+	 */
+	if (waitfor) {
+		TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) {
+			if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY))
+			    == DEPCOMPLETE) {
+				jwait(&inoref->if_list);
+				goto again;
+			}
+		}
+	}
+	/*
 	 * Changes have been initiated. Anything depending on these
 	 * changes cannot occur until this inode has been written.
 	 */
@@ -4945,10 +9584,12 @@ softdep_update_inodeblock(ip, bp, waitfor)
 	 */
 	merge_inode_lists(&inodedep->id_newinoupdt, &inodedep->id_inoupdt);
 	if (!TAILQ_EMPTY(&inodedep->id_inoupdt))
-		handle_allocdirect_partdone(TAILQ_FIRST(&inodedep->id_inoupdt));
+		handle_allocdirect_partdone(TAILQ_FIRST(&inodedep->id_inoupdt),
+		    NULL);
 	merge_inode_lists(&inodedep->id_newextupdt, &inodedep->id_extupdt);
 	if (!TAILQ_EMPTY(&inodedep->id_extupdt))
-		handle_allocdirect_partdone(TAILQ_FIRST(&inodedep->id_extupdt));
+		handle_allocdirect_partdone(TAILQ_FIRST(&inodedep->id_extupdt),
+		    NULL);
 	/*
 	 * Now that the inode has been pushed into the buffer, the
 	 * operations dependent on the inode being written to disk
@@ -4971,11 +9612,11 @@ softdep_update_inodeblock(ip, bp, waitfor)
 		return;
 	}
 retry:
-	if ((inodedep->id_state & DEPCOMPLETE) != 0) {
+	if ((inodedep->id_state & (DEPCOMPLETE | GOINGAWAY)) != 0) {
 		FREE_LOCK(&lk);
 		return;
 	}
-	ibp = inodedep->id_buf;
+	ibp = inodedep->id_bmsafemap->sm_buf;
 	ibp = getdirtybuf(ibp, &lk, MNT_WAIT);
 	if (ibp == NULL) {
 		/*
@@ -5007,13 +9648,13 @@ merge_inode_lists(newlisthead, oldlisthead)
 
 	newadp = TAILQ_FIRST(newlisthead);
 	for (listadp = TAILQ_FIRST(oldlisthead); listadp && newadp;) {
-		if (listadp->ad_lbn < newadp->ad_lbn) {
+		if (listadp->ad_offset < newadp->ad_offset) {
 			listadp = TAILQ_NEXT(listadp, ad_next);
 			continue;
 		}
 		TAILQ_REMOVE(newlisthead, newadp, ad_next);
 		TAILQ_INSERT_BEFORE(listadp, newadp, ad_next);
-		if (listadp->ad_lbn == newadp->ad_lbn) {
+		if (listadp->ad_offset == newadp->ad_offset) {
 			allocdirect_merge(oldlisthead, newadp,
 			    listadp);
 			listadp = newadp;
@@ -5036,6 +9677,7 @@ softdep_fsync(vp)
 {
 	struct inodedep *inodedep;
 	struct pagedep *pagedep;
+	struct inoref *inoref;
 	struct worklist *wk;
 	struct diradd *dap;
 	struct mount *mp;
@@ -5052,17 +9694,24 @@ softdep_fsync(vp)
 	fs = ip->i_fs;
 	mp = vp->v_mount;
 	ACQUIRE_LOCK(&lk);
+restart:
 	if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) == 0) {
 		FREE_LOCK(&lk);
 		return (0);
 	}
+	TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) {
+		if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY))
+		    == DEPCOMPLETE) {
+			jwait(&inoref->if_list);
+			goto restart;
+		}
+	}
 	if (!LIST_EMPTY(&inodedep->id_inowait) ||
-	    !LIST_EMPTY(&inodedep->id_bufwait) ||
 	    !TAILQ_EMPTY(&inodedep->id_extupdt) ||
 	    !TAILQ_EMPTY(&inodedep->id_newextupdt) ||
 	    !TAILQ_EMPTY(&inodedep->id_inoupdt) ||
 	    !TAILQ_EMPTY(&inodedep->id_newinoupdt))
-		panic("softdep_fsync: pending ops");
+		panic("softdep_fsync: pending ops %p", inodedep);
 	for (error = 0, flushparent = 0; ; ) {
 		if ((wk = LIST_FIRST(&inodedep->id_pendinghd)) == NULL)
 			break;
@@ -5254,8 +9903,8 @@ int
 softdep_sync_metadata(struct vnode *vp)
 {
 	struct pagedep *pagedep;
-	struct allocdirect *adp;
 	struct allocindir *aip;
+	struct newblk *newblk;
 	struct buf *bp, *nbp;
 	struct worklist *wk;
 	struct bufobj *bo;
@@ -5319,27 +9968,15 @@ loop:
 		switch (wk->wk_type) {
 
 		case D_ALLOCDIRECT:
-			adp = WK_ALLOCDIRECT(wk);
-			if (adp->ad_state & DEPCOMPLETE)
-				continue;
-			nbp = adp->ad_buf;
-			nbp = getdirtybuf(nbp, &lk, waitfor);
-			if (nbp == NULL)
-				continue;
-			FREE_LOCK(&lk);
-			if (waitfor == MNT_NOWAIT) {
-				bawrite(nbp);
-			} else if ((error = bwrite(nbp)) != 0) {
-				break;
+		case D_ALLOCINDIR:
+			newblk = WK_NEWBLK(wk);
+			if (newblk->nb_jnewblk != NULL) {
+				jwait(&newblk->nb_jnewblk->jn_list);
+				goto restart;
 			}
-			ACQUIRE_LOCK(&lk);
-			continue;
-
-		case D_ALLOCINDIR:
-			aip = WK_ALLOCINDIR(wk);
-			if (aip->ai_state & DEPCOMPLETE)
+			if (newblk->nb_state & DEPCOMPLETE)
 				continue;
-			nbp = aip->ai_buf;
+			nbp = newblk->nb_bmsafemap->sm_buf;
 			nbp = getdirtybuf(nbp, &lk, waitfor);
 			if (nbp == NULL)
 				continue;
@@ -5355,10 +9992,16 @@ loop:
 		case D_INDIRDEP:
 		restart:
 
-			LIST_FOREACH(aip, &WK_INDIRDEP(wk)->ir_deplisthd, ai_next) {
-				if (aip->ai_state & DEPCOMPLETE)
+			LIST_FOREACH(aip,
+			    &WK_INDIRDEP(wk)->ir_deplisthd, ai_next) {
+				newblk = (struct newblk *)aip;
+				if (newblk->nb_jnewblk != NULL) {
+					jwait(&newblk->nb_jnewblk->jn_list);
+					goto restart;
+				}
+				if (newblk->nb_state & DEPCOMPLETE)
 					continue;
-				nbp = aip->ai_buf;
+				nbp = newblk->nb_bmsafemap->sm_buf;
 				nbp = getdirtybuf(nbp, &lk, MNT_WAIT);
 				if (nbp == NULL)
 					goto restart;
@@ -5371,14 +10014,6 @@ loop:
 			}
 			continue;
 
-		case D_INODEDEP:
-			if ((error = flush_inodedep_deps(wk->wk_mp,
-			    WK_INODEDEP(wk)->id_ino)) != 0) {
-				FREE_LOCK(&lk);
-				break;
-			}
-			continue;
-
 		case D_PAGEDEP:
 			/*
 			 * We are trying to sync a directory that may
@@ -5400,48 +10035,6 @@ loop:
 			}
 			continue;
 
-		case D_MKDIR:
-			/*
-			 * This case should never happen if the vnode has
-			 * been properly sync'ed. However, if this function
-			 * is used at a place where the vnode has not yet
-			 * been sync'ed, this dependency can show up. So,
-			 * rather than panic, just flush it.
-			 */
-			nbp = WK_MKDIR(wk)->md_buf;
-			nbp = getdirtybuf(nbp, &lk, waitfor);
-			if (nbp == NULL)
-				continue;
-			FREE_LOCK(&lk);
-			if (waitfor == MNT_NOWAIT) {
-				bawrite(nbp);
-			} else if ((error = bwrite(nbp)) != 0) {
-				break;
-			}
-			ACQUIRE_LOCK(&lk);
-			continue;
-
-		case D_BMSAFEMAP:
-			/*
-			 * This case should never happen if the vnode has
-			 * been properly sync'ed. However, if this function
-			 * is used at a place where the vnode has not yet
-			 * been sync'ed, this dependency can show up. So,
-			 * rather than panic, just flush it.
-			 */
-			nbp = WK_BMSAFEMAP(wk)->sm_buf;
-			nbp = getdirtybuf(nbp, &lk, waitfor);
-			if (nbp == NULL)
-				continue;
-			FREE_LOCK(&lk);
-			if (waitfor == MNT_NOWAIT) {
-				bawrite(nbp);
-			} else if ((error = bwrite(nbp)) != 0) {
-				break;
-			}
-			ACQUIRE_LOCK(&lk);
-			continue;
-
 		default:
 			panic("softdep_sync_metadata: Unknown type %s",
 			    TYPENAME(wk->wk_type));
@@ -5489,7 +10082,8 @@ loop:
 	BO_LOCK(bo);
 	drain_output(vp);
 	BO_UNLOCK(bo);
-	return (0);
+	return ffs_update(vp, 1);
+	/* return (0); */
 }
 
 /*
@@ -5502,6 +10096,7 @@ flush_inodedep_deps(mp, ino)
 	ino_t ino;
 {
 	struct inodedep *inodedep;
+	struct inoref *inoref;
 	int error, waitfor;
 
 	/*
@@ -5522,8 +10117,16 @@ flush_inodedep_deps(mp, ino)
 			return (error);
 		FREE_LOCK(&lk);
 		ACQUIRE_LOCK(&lk);
+restart:
 		if (inodedep_lookup(mp, ino, 0, &inodedep) == 0)
 			return (0);
+		TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) {
+			if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY))
+			    == DEPCOMPLETE) {
+				jwait(&inoref->if_list);
+				goto restart;
+			}
+		}
 		if (flush_deplist(&inodedep->id_inoupdt, waitfor, &error) ||
 		    flush_deplist(&inodedep->id_newinoupdt, waitfor, &error) ||
 		    flush_deplist(&inodedep->id_extupdt, waitfor, &error) ||
@@ -5555,13 +10158,19 @@ flush_deplist(listhead, waitfor, errorp)
 	int *errorp;
 {
 	struct allocdirect *adp;
+	struct newblk *newblk;
 	struct buf *bp;
 
 	mtx_assert(&lk, MA_OWNED);
 	TAILQ_FOREACH(adp, listhead, ad_next) {
-		if (adp->ad_state & DEPCOMPLETE)
+		newblk = (struct newblk *)adp;
+		if (newblk->nb_jnewblk != NULL) {
+			jwait(&newblk->nb_jnewblk->jn_list);
+			return (1);
+		}
+		if (newblk->nb_state & DEPCOMPLETE)
 			continue;
-		bp = adp->ad_buf;
+		bp = newblk->nb_bmsafemap->sm_buf;
 		bp = getdirtybuf(bp, &lk, waitfor);
 		if (bp == NULL) {
 			if (waitfor == MNT_NOWAIT)
@@ -5582,6 +10191,100 @@ flush_deplist(listhead, waitfor, errorp)
 }
 
 /*
+ * Flush dependencies associated with an allocdirect block.
+ */
+static int
+flush_newblk_dep(vp, mp, lbn)
+	struct vnode *vp;
+	struct mount *mp;
+	ufs_lbn_t lbn;
+{
+	struct newblk *newblk;
+	struct bufobj *bo;
+	struct inode *ip;
+	struct buf *bp;
+	ufs2_daddr_t blkno;
+	int error;
+
+	error = 0;
+	bo = &vp->v_bufobj;
+	ip = VTOI(vp);
+	blkno = DIP(ip, i_db[lbn]);
+	if (blkno == 0)
+		panic("flush_newblk_dep: Missing block");
+	ACQUIRE_LOCK(&lk);
+	/*
+	 * Loop until all dependencies related to this block are satisfied.
+	 * We must be careful to restart after each sleep in case a write
+	 * completes some part of this process for us.
+	 */
+	for (;;) {
+		if (newblk_lookup(mp, blkno, 0, &newblk) == 0) {
+			FREE_LOCK(&lk);
+			break;
+		}
+		if (newblk->nb_list.wk_type != D_ALLOCDIRECT)
+			panic("flush_newblk_deps: Bad newblk %p", newblk);
+		/*
+		 * Flush the journal.
+		 */
+		if (newblk->nb_jnewblk != NULL) {
+			jwait(&newblk->nb_jnewblk->jn_list);
+			continue;
+		}
+		/*
+		 * Write the bitmap dependency.
+		 */
+		if ((newblk->nb_state & DEPCOMPLETE) == 0) {
+			bp = newblk->nb_bmsafemap->sm_buf;
+			bp = getdirtybuf(bp, &lk, MNT_WAIT);
+			if (bp == NULL)
+				continue;
+			FREE_LOCK(&lk);
+			error = bwrite(bp);
+			if (error)
+				break;
+			ACQUIRE_LOCK(&lk);
+			continue;
+		}
+		/*
+		 * Write the buffer.
+		 */
+		FREE_LOCK(&lk);
+		BO_LOCK(bo);
+		bp = gbincore(bo, lbn);
+		if (bp != NULL) {
+			error = BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
+			    LK_INTERLOCK, BO_MTX(bo));
+			if (error == ENOLCK) {
+				ACQUIRE_LOCK(&lk);
+				continue; /* Slept, retry */
+			}
+			if (error != 0)
+				break;	/* Failed */
+			if (bp->b_flags & B_DELWRI) {
+				bremfree(bp);
+				error = bwrite(bp);
+				if (error)
+					break;
+			} else
+				BUF_UNLOCK(bp);
+		} else
+			BO_UNLOCK(bo);
+		/*
+		 * We have to wait for the direct pointers to
+		 * point at the newdirblk before the dependency
+		 * will go away.
+		 */
+		error = ffs_update(vp, MNT_WAIT);
+		if (error)
+			break;
+		ACQUIRE_LOCK(&lk);
+	}
+	return (error);
+}
+
+/*
  * Eliminate a pagedep dependency by flushing out all its diradd dependencies.
  * Called with splbio blocked.
  */
@@ -5592,16 +10295,16 @@ flush_pagedep_deps(pvp, mp, diraddhdp)
 	struct diraddhd *diraddhdp;
 {
 	struct inodedep *inodedep;
+	struct inoref *inoref;
 	struct ufsmount *ump;
 	struct diradd *dap;
 	struct vnode *vp;
-	struct bufobj *bo;
 	int error = 0;
 	struct buf *bp;
 	ino_t inum;
-	struct worklist *wk;
 
 	ump = VFSTOUFS(mp);
+restart:
 	while ((dap = LIST_FIRST(diraddhdp)) != NULL) {
 		/*
 		 * Flush ourselves if this directory entry
@@ -5609,7 +10312,7 @@ flush_pagedep_deps(pvp, mp, diraddhdp)
 		 */
 		if (dap->da_state & MKDIR_PARENT) {
 			FREE_LOCK(&lk);
-			if ((error = ffs_update(pvp, 1)) != 0)
+			if ((error = ffs_update(pvp, MNT_WAIT)) != 0)
 				break;
 			ACQUIRE_LOCK(&lk);
 			/*
@@ -5623,84 +10326,51 @@ flush_pagedep_deps(pvp, mp, diraddhdp)
 		/*
 		 * A newly allocated directory must have its "." and
 		 * ".." entries written out before its name can be
-		 * committed in its parent. We do not want or need
-		 * the full semantics of a synchronous ffs_syncvnode as
-		 * that may end up here again, once for each directory
-		 * level in the filesystem. Instead, we push the blocks
-		 * and wait for them to clear. We have to fsync twice
-		 * because the first call may choose to defer blocks
-		 * that still have dependencies, but deferral will
-		 * happen at most once.
+		 * committed in its parent. 
 		 */
 		inum = dap->da_newinum;
+		if (inodedep_lookup(UFSTOVFS(ump), inum, 0, &inodedep) == 0)
+			panic("flush_pagedep_deps: lost inode1");
+		/*
+		 * Wait for any pending journal adds to complete so we don't
+		 * cause rollbacks while syncing.
+		 */
+		TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) {
+			if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY))
+			    == DEPCOMPLETE) {
+				jwait(&inoref->if_list);
+				goto restart;
+			}
+		}
 		if (dap->da_state & MKDIR_BODY) {
 			FREE_LOCK(&lk);
 			if ((error = ffs_vgetf(mp, inum, LK_EXCLUSIVE, &vp,
 			    FFSV_FORCEINSMQ)))
 				break;
-			if ((error=ffs_syncvnode(vp, MNT_NOWAIT)) ||
-			    (error=ffs_syncvnode(vp, MNT_NOWAIT))) {
-				vput(vp);
-				break;
-			}
-			bo = &vp->v_bufobj;
-			BO_LOCK(bo);
-			drain_output(vp);
+			error = flush_newblk_dep(vp, mp, 0);
 			/*
-			 * If first block is still dirty with a D_MKDIR
-			 * dependency then it needs to be written now.
+			 * If we still have the dependency we might need to
+			 * update the vnode to sync the new link count to
+			 * disk.
 			 */
-			for (;;) {
-				error = 0;
-				bp = gbincore(bo, 0);
-				if (bp == NULL)
-					break;	/* First block not present */
-				error = BUF_LOCK(bp,
-						 LK_EXCLUSIVE |
-						 LK_SLEEPFAIL |
-						 LK_INTERLOCK,
-						 BO_MTX(bo));
-				BO_LOCK(bo);
-				if (error == ENOLCK)
-					continue;	/* Slept, retry */
-				if (error != 0)
-					break;		/* Failed */
-				if ((bp->b_flags & B_DELWRI) == 0) {
-					BUF_UNLOCK(bp);
-					break;	/* Buffer not dirty */
-				}
-				for (wk = LIST_FIRST(&bp->b_dep);
-				     wk != NULL;
-				     wk = LIST_NEXT(wk, wk_list))
-					if (wk->wk_type == D_MKDIR)
-						break;
-				if (wk == NULL)
-					BUF_UNLOCK(bp);	/* Dependency gone */
-				else {
-					/*
-					 * D_MKDIR dependency remains,
-					 * must write buffer to stable
-					 * storage.
-					 */
-					BO_UNLOCK(bo);
-					bremfree(bp);
-					error = bwrite(bp);
-					BO_LOCK(bo);
-				}
-				break;
-			}
-			BO_UNLOCK(bo);
+			if (error == 0 && dap == LIST_FIRST(diraddhdp))
+				error = ffs_update(vp, MNT_WAIT);
 			vput(vp);
 			if (error != 0)
-				break;	/* Flushing of first block failed */
+				break;
 			ACQUIRE_LOCK(&lk);
 			/*
 			 * If that cleared dependencies, go on to next.
 			 */
 			if (dap != LIST_FIRST(diraddhdp))
 				continue;
-			if (dap->da_state & MKDIR_BODY)
-				panic("flush_pagedep_deps: MKDIR_BODY");
+			if (dap->da_state & MKDIR_BODY) {
+				inodedep_lookup(UFSTOVFS(ump), inum, 0,
+				    &inodedep);
+				panic("flush_pagedep_deps: MKDIR_BODY "
+				    "inodedep %p dap %p vp %p",
+				    inodedep, dap, vp);
+			}
 		}
 		/*
 		 * Flush the inode on which the directory entry depends.
@@ -5719,8 +10389,8 @@ retry:
 		 * If the inode still has bitmap dependencies,
 		 * push them to disk.
 		 */
-		if ((inodedep->id_state & DEPCOMPLETE) == 0) {
-			bp = inodedep->id_buf;
+		if ((inodedep->id_state & (DEPCOMPLETE | GOINGAWAY)) == 0) {
+			bp = inodedep->id_bmsafemap->sm_buf;
 			bp = getdirtybuf(bp, &lk, MNT_WAIT);
 			if (bp == NULL)
 				goto retry;
@@ -5733,24 +10403,29 @@ retry:
 		}
 		/*
 		 * If the inode is still sitting in a buffer waiting
-		 * to be written, push it to disk.
+		 * to be written or waiting for the link count to be
+		 * adjusted update it here to flush it to disk.
 		 */
-		FREE_LOCK(&lk);
-		if ((error = bread(ump->um_devvp,
-		    fsbtodb(ump->um_fs, ino_to_fsba(ump->um_fs, inum)),
-		    (int)ump->um_fs->fs_bsize, NOCRED, &bp)) != 0) {
-			brelse(bp);
-			break;
+		if (dap == LIST_FIRST(diraddhdp)) {
+			FREE_LOCK(&lk);
+			if ((error = ffs_vgetf(mp, inum, LK_EXCLUSIVE, &vp,
+			    FFSV_FORCEINSMQ)))
+				break;
+			error = ffs_update(vp, MNT_WAIT);
+			vput(vp);
+			if (error)
+				break;
+			ACQUIRE_LOCK(&lk);
 		}
-		if ((error = bwrite(bp)) != 0)
-			break;
-		ACQUIRE_LOCK(&lk);
 		/*
 		 * If we have failed to get rid of all the dependencies
 		 * then something is seriously wrong.
 		 */
-		if (dap == LIST_FIRST(diraddhdp))
-			panic("flush_pagedep_deps: flush failed");
+		if (dap == LIST_FIRST(diraddhdp)) {
+			inodedep_lookup(UFSTOVFS(ump), inum, 0, &inodedep);
+			panic("flush_pagedep_deps: failed to flush " 
+			    "inodedep %p ino %d dap %p", inodedep, inum, dap);
+		}
 	}
 	if (error)
 		ACQUIRE_LOCK(&lk);
@@ -6100,10 +10775,13 @@ softdep_count_dependencies(bp, wantcount)
 	int wantcount;
 {
 	struct worklist *wk;
+	struct bmsafemap *bmsafemap;
 	struct inodedep *inodedep;
 	struct indirdep *indirdep;
+	struct freeblks *freeblks;
 	struct allocindir *aip;
 	struct pagedep *pagedep;
+	struct dirrem *dirrem;
 	struct diradd *dap;
 	int i, retval;
 
@@ -6132,6 +10810,12 @@ softdep_count_dependencies(bp, wantcount)
 				if (!wantcount)
 					goto out;
 			}
+			if (TAILQ_FIRST(&inodedep->id_inoreflst)) {
+				/* Add reference dependency. */
+				retval += 1;
+				if (!wantcount)
+					goto out;
+			}
 			continue;
 
 		case D_INDIRDEP:
@@ -6147,6 +10831,14 @@ softdep_count_dependencies(bp, wantcount)
 
 		case D_PAGEDEP:
 			pagedep = WK_PAGEDEP(wk);
+			LIST_FOREACH(dirrem, &pagedep->pd_dirremhd, dm_next) {
+				if (LIST_FIRST(&dirrem->dm_jremrefhd)) {
+					/* Journal remove ref dependency. */
+					retval += 1;
+					if (!wantcount)
+						goto out;
+				}
+			}
 			for (i = 0; i < DAHASHSZ; i++) {
 
 				LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist) {
@@ -6159,14 +10851,44 @@ softdep_count_dependencies(bp, wantcount)
 			continue;
 
 		case D_BMSAFEMAP:
+			bmsafemap = WK_BMSAFEMAP(wk);
+			if (LIST_FIRST(&bmsafemap->sm_jaddrefhd)) {
+				/* Add reference dependency. */
+				retval += 1;
+				if (!wantcount)
+					goto out;
+			}
+			if (LIST_FIRST(&bmsafemap->sm_jnewblkhd)) {
+				/* Allocate block dependency. */
+				retval += 1;
+				if (!wantcount)
+					goto out;
+			}
+			continue;
+
+		case D_FREEBLKS:
+			freeblks = WK_FREEBLKS(wk);
+			if (LIST_FIRST(&freeblks->fb_jfreeblkhd)) {
+				/* Freeblk journal dependency. */
+				retval += 1;
+				if (!wantcount)
+					goto out;
+			}
+			continue;
+
+		case D_FREEWORK:
+		case D_FREEDEP:
+		case D_JSEGDEP:
 		case D_ALLOCDIRECT:
 		case D_ALLOCINDIR:
 		case D_MKDIR:
+		case D_JSEG:
+		case D_SBDEP:
 			/* never a dependency on these blocks */
 			continue;
 
 		default:
-			panic("softdep_check_for_rollback: Unexpected type %s",
+			panic("softdep_count_dependencies: Unexpected type %s",
 			    TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
@@ -6382,6 +11104,45 @@ softdep_error(func, error)
 
 #ifdef DDB
 
+static void
+inodedep_print(struct inodedep *inodedep, int verbose)
+{
+	db_printf("%p fs %p st %x ino %jd inoblk %jd delta %d nlink %d"
+	    " saveino %p\n",
+	    inodedep, inodedep->id_fs, inodedep->id_state,
+	    (intmax_t)inodedep->id_ino,
+	    (intmax_t)fsbtodb(inodedep->id_fs,
+	    ino_to_fsba(inodedep->id_fs, inodedep->id_ino)),
+	    inodedep->id_nlinkdelta, inodedep->id_savednlink,
+	    inodedep->id_savedino1);
+
+	if (verbose == 0)
+		return;
+
+	db_printf("\tpendinghd %p, bufwait %p, inowait %p, inoreflst %p, "
+	    "mkdiradd %p\n",
+	    LIST_FIRST(&inodedep->id_pendinghd),
+	    LIST_FIRST(&inodedep->id_bufwait),
+	    LIST_FIRST(&inodedep->id_inowait),
+	    TAILQ_FIRST(&inodedep->id_inoreflst),
+	    inodedep->id_mkdiradd);
+	db_printf("\tinoupdt %p, newinoupdt %p, extupdt %p, newextupdt %p\n",
+	    TAILQ_FIRST(&inodedep->id_inoupdt),
+	    TAILQ_FIRST(&inodedep->id_newinoupdt),
+	    TAILQ_FIRST(&inodedep->id_extupdt),
+	    TAILQ_FIRST(&inodedep->id_newextupdt));
+}
+
+DB_SHOW_COMMAND(inodedep, db_show_inodedep)
+{
+
+	if (have_addr == 0) {
+		db_printf("Address required\n");
+		return;
+	}
+	inodedep_print((struct inodedep*)addr, 1);
+}
+
 DB_SHOW_COMMAND(inodedeps, db_show_inodedeps)
 {
 	struct inodedep_hashhead *inodedephd;
@@ -6395,15 +11156,62 @@ DB_SHOW_COMMAND(inodedeps, db_show_inodedeps)
 		LIST_FOREACH(inodedep, inodedephd, id_hash) {
 			if (fs != NULL && fs != inodedep->id_fs)
 				continue;
-			db_printf("%p fs %p st %x ino %jd inoblk %jd\n",
-			    inodedep, inodedep->id_fs, inodedep->id_state,
-			    (intmax_t)inodedep->id_ino,
-			    (intmax_t)fsbtodb(inodedep->id_fs,
-			    ino_to_fsba(inodedep->id_fs, inodedep->id_ino)));
+			inodedep_print(inodedep, 0);
 		}
 	}
 }
 
+DB_SHOW_COMMAND(worklist, db_show_worklist)
+{
+	struct worklist *wk;
+
+	if (have_addr == 0) {
+		db_printf("Address required\n");
+		return;
+	}
+	wk = (struct worklist *)addr;
+	printf("worklist: %p type %s state 0x%X\n",
+	    wk, TYPENAME(wk->wk_type), wk->wk_state);
+}
+
+DB_SHOW_COMMAND(workhead, db_show_workhead)
+{
+	struct workhead *wkhd;
+	struct worklist *wk;
+	int i;
+
+	if (have_addr == 0) {
+		db_printf("Address required\n");
+		return;
+	}
+	wkhd = (struct workhead *)addr;
+	wk = LIST_FIRST(wkhd);
+	for (i = 0; i < 100 && wk != NULL; i++, wk = LIST_NEXT(wk, wk_list))
+		db_printf("worklist: %p type %s state 0x%X",
+		    wk, TYPENAME(wk->wk_type), wk->wk_state);
+	if (i == 100)
+		db_printf("workhead overflow");
+	printf("\n");
+}
+
+
+DB_SHOW_COMMAND(mkdirs, db_show_mkdirs)
+{
+	struct jaddref *jaddref;
+	struct diradd *diradd;
+	struct mkdir *mkdir;
+
+	LIST_FOREACH(mkdir, &mkdirlisthd, md_mkdirs) {
+		diradd = mkdir->md_diradd;
+		db_printf("mkdir: %p state 0x%X dap %p state 0x%X",
+		    mkdir, mkdir->md_state, diradd, diradd->da_state);
+		if ((jaddref = mkdir->md_jaddref) != NULL)
+			db_printf(" jaddref %p jaddref state 0x%X",
+			    jaddref, jaddref->ja_state);
+		db_printf("\n");
+	}
+}
+
 #endif /* DDB */
 
 #endif /* SOFTUPDATES */
Index: /usr/src/sys/ufs/ffs/ffs_vnops.c
===================================================================
--- /usr/src/sys/ufs/ffs/ffs_vnops.c	(revision 202614)
+++ /usr/src/sys/ufs/ffs/ffs_vnops.c	(working copy)
@@ -225,6 +225,7 @@ ffs_syncvnode(struct vnode *vp, int waitfor)
 	wait = (waitfor == MNT_WAIT);
 	lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1));
 	bo = &vp->v_bufobj;
+	ip->i_flag &= ~IN_NEEDSYNC;
 
 	/*
 	 * Flush all dirty buffers associated with a vnode.
Index: /usr/src/sys/ufs/ffs/ffs_alloc.c
===================================================================
--- /usr/src/sys/ufs/ffs/ffs_alloc.c	(revision 202614)
+++ /usr/src/sys/ufs/ffs/ffs_alloc.c	(working copy)
@@ -94,23 +94,23 @@ __FBSDID("$FreeBSD$");
 #include <ufs/ffs/ffs_extern.h>
 
 typedef ufs2_daddr_t allocfcn_t(struct inode *ip, int cg, ufs2_daddr_t bpref,
-				  int size);
+				  int size, int rsize);
 
-static ufs2_daddr_t ffs_alloccg(struct inode *, int, ufs2_daddr_t, int);
+static ufs2_daddr_t ffs_alloccg(struct inode *, int, ufs2_daddr_t, int, int);
 static ufs2_daddr_t
-	      ffs_alloccgblk(struct inode *, struct buf *, ufs2_daddr_t);
+	      ffs_alloccgblk(struct inode *, struct buf *, ufs2_daddr_t, int);
 #ifdef INVARIANTS
 static int	ffs_checkblk(struct inode *, ufs2_daddr_t, long);
 #endif
-static ufs2_daddr_t ffs_clusteralloc(struct inode *, int, ufs2_daddr_t, int);
-static void	ffs_clusteracct(struct ufsmount *, struct fs *, struct cg *,
-		    ufs1_daddr_t, int);
+static ufs2_daddr_t ffs_clusteralloc(struct inode *, int, ufs2_daddr_t, int,
+		    int);
 static ino_t	ffs_dirpref(struct inode *);
 static ufs2_daddr_t ffs_fragextend(struct inode *, int, ufs2_daddr_t, int, int);
 static void	ffs_fserr(struct fs *, ino_t, char *);
 static ufs2_daddr_t	ffs_hashalloc
-		(struct inode *, int, ufs2_daddr_t, int, allocfcn_t *);
-static ufs2_daddr_t ffs_nodealloccg(struct inode *, int, ufs2_daddr_t, int);
+		(struct inode *, int, ufs2_daddr_t, int, int, allocfcn_t *);
+static ufs2_daddr_t ffs_nodealloccg(struct inode *, int, ufs2_daddr_t, int,
+		    int);
 static ufs1_daddr_t ffs_mapsearch(struct fs *, struct cg *, ufs2_daddr_t, int);
 static int	ffs_reallocblks_ufs1(struct vop_reallocblks_args *);
 static int	ffs_reallocblks_ufs2(struct vop_reallocblks_args *);
@@ -187,7 +187,7 @@ retry:
 		cg = ino_to_cg(fs, ip->i_number);
 	else
 		cg = dtog(fs, bpref);
-	bno = ffs_hashalloc(ip, cg, bpref, size, ffs_alloccg);
+	bno = ffs_hashalloc(ip, cg, bpref, size, size, ffs_alloccg);
 	if (bno > 0) {
 		delta = btodb(size);
 		if (ip->i_flag & IN_SPACECOUNTED) {
@@ -385,16 +385,12 @@ retry:
 		panic("ffs_realloccg: bad optim");
 		/* NOTREACHED */
 	}
-	bno = ffs_hashalloc(ip, cg, bpref, request, ffs_alloccg);
+	bno = ffs_hashalloc(ip, cg, bpref, request, nsize, ffs_alloccg);
 	if (bno > 0) {
 		bp->b_blkno = fsbtodb(fs, bno);
 		if (!DOINGSOFTDEP(vp))
 			ffs_blkfree(ump, fs, ip->i_devvp, bprev, (long)osize,
-			    ip->i_number);
-		if (nsize < request)
-			ffs_blkfree(ump, fs, ip->i_devvp,
-			    bno + numfrags(fs, nsize),
-			    (long)(request - nsize), ip->i_number);
+			    ip->i_number, NULL);
 		delta = btodb(nsize - osize);
 		if (ip->i_flag & IN_SPACECOUNTED) {
 			UFS_LOCK(ump);
@@ -483,6 +479,14 @@ ffs_reallocblks(ap)
 
 	if (doreallocblks == 0)
 		return (ENOSPC);
+	/*
+	 * We can't wait in softdep prealloc as it may fsync and recurse
+	 * here.  Instead we simply fail to reallocate blocks if this
+	 * rare condition arises.
+	 */
+	if (DOINGSOFTDEP(ap->a_vp))
+		if (softdep_prealloc(ap->a_vp, MNT_NOWAIT) != 0)
+			return (ENOSPC);
 	if (VTOI(ap->a_vp)->i_ump->um_fstype == UFS1)
 		return (ffs_reallocblks_ufs1(ap));
 	return (ffs_reallocblks_ufs2(ap));
@@ -583,7 +587,7 @@ ffs_reallocblks_ufs1(ap)
 	 * Search the block map looking for an allocation of the desired size.
 	 */
 	if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref,
-	    len, ffs_clusteralloc)) == 0) {
+	    len, len, ffs_clusteralloc)) == 0) {
 		UFS_UNLOCK(ump);
 		goto fail;
 	}
@@ -669,7 +673,7 @@ ffs_reallocblks_ufs1(ap)
 		if (!DOINGSOFTDEP(vp))
 			ffs_blkfree(ump, fs, ip->i_devvp,
 			    dbtofsb(fs, buflist->bs_children[i]->b_blkno),
-			    fs->fs_bsize, ip->i_number);
+			    fs->fs_bsize, ip->i_number, NULL);
 		buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
 #ifdef INVARIANTS
 		if (!ffs_checkblk(ip,
@@ -791,7 +795,7 @@ ffs_reallocblks_ufs2(ap)
 	 * Search the block map looking for an allocation of the desired size.
 	 */
 	if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref,
-	    len, ffs_clusteralloc)) == 0) {
+	    len, len, ffs_clusteralloc)) == 0) {
 		UFS_UNLOCK(ump);
 		goto fail;
 	}
@@ -877,7 +881,7 @@ ffs_reallocblks_ufs2(ap)
 		if (!DOINGSOFTDEP(vp))
 			ffs_blkfree(ump, fs, ip->i_devvp,
 			    dbtofsb(fs, buflist->bs_children[i]->b_blkno),
-			    fs->fs_bsize, ip->i_number);
+			    fs->fs_bsize, ip->i_number, NULL);
 		buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
 #ifdef INVARIANTS
 		if (!ffs_checkblk(ip,
@@ -964,7 +968,7 @@ ffs_valloc(pvp, mode, cred, vpp)
 		if (fs->fs_contigdirs[cg] > 0)
 			fs->fs_contigdirs[cg]--;
 	}
-	ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode,
+	ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode, 0,
 					(allocfcn_t *)ffs_nodealloccg);
 	if (ino == 0)
 		goto noinodes;
@@ -1273,11 +1277,12 @@ ffs_blkpref_ufs2(ip, lbn, indx, bap)
  */
 /*VARARGS5*/
 static ufs2_daddr_t
-ffs_hashalloc(ip, cg, pref, size, allocator)
+ffs_hashalloc(ip, cg, pref, size, rsize, allocator)
 	struct inode *ip;
 	int cg;
 	ufs2_daddr_t pref;
-	int size;	/* size for data blocks, mode for inodes */
+	int size;	/* Search size for data blocks, mode for inodes */
+	int rsize;	/* Real allocated size. */
 	allocfcn_t *allocator;
 {
 	struct fs *fs;
@@ -1293,7 +1298,7 @@ static ufs2_daddr_t
 	/*
 	 * 1: preferred cylinder group
 	 */
-	result = (*allocator)(ip, cg, pref, size);
+	result = (*allocator)(ip, cg, pref, size, rsize);
 	if (result)
 		return (result);
 	/*
@@ -1303,7 +1308,7 @@ static ufs2_daddr_t
 		cg += i;
 		if (cg >= fs->fs_ncg)
 			cg -= fs->fs_ncg;
-		result = (*allocator)(ip, cg, 0, size);
+		result = (*allocator)(ip, cg, 0, size, rsize);
 		if (result)
 			return (result);
 	}
@@ -1314,7 +1319,7 @@ static ufs2_daddr_t
 	 */
 	cg = (icg + 2) % fs->fs_ncg;
 	for (i = 2; i < fs->fs_ncg; i++) {
-		result = (*allocator)(ip, cg, 0, size);
+		result = (*allocator)(ip, cg, 0, size, rsize);
 		if (result)
 			return (result);
 		cg++;
@@ -1396,7 +1401,8 @@ ffs_fragextend(ip, cg, bprev, osize, nsize)
 	ACTIVECLEAR(fs, cg);
 	UFS_UNLOCK(ump);
 	if (DOINGSOFTDEP(ITOV(ip)))
-		softdep_setup_blkmapdep(bp, UFSTOVFS(ump), bprev);
+		softdep_setup_blkmapdep(bp, UFSTOVFS(ump), bprev,
+		    frags, numfrags(fs, osize));
 	bdwrite(bp);
 	return (bprev);
 
@@ -1414,11 +1420,12 @@ fail:
  * and if it is, allocate it.
  */
 static ufs2_daddr_t
-ffs_alloccg(ip, cg, bpref, size)
+ffs_alloccg(ip, cg, bpref, size, rsize)
 	struct inode *ip;
 	int cg;
 	ufs2_daddr_t bpref;
 	int size;
+	int rsize;
 {
 	struct fs *fs;
 	struct cg *cgp;
@@ -1446,7 +1453,7 @@ static ufs2_daddr_t
 	cgp->cg_old_time = cgp->cg_time = time_second;
 	if (size == fs->fs_bsize) {
 		UFS_LOCK(ump);
-		blkno = ffs_alloccgblk(ip, bp, bpref);
+		blkno = ffs_alloccgblk(ip, bp, bpref, rsize);
 		ACTIVECLEAR(fs, cg);
 		UFS_UNLOCK(ump);
 		bdwrite(bp);
@@ -1470,21 +1477,14 @@ static ufs2_daddr_t
 		if (cgp->cg_cs.cs_nbfree == 0)
 			goto fail;
 		UFS_LOCK(ump);
-		blkno = ffs_alloccgblk(ip, bp, bpref);
-		bno = dtogd(fs, blkno);
-		for (i = frags; i < fs->fs_frag; i++)
-			setbit(blksfree, bno + i);
-		i = fs->fs_frag - frags;
-		cgp->cg_cs.cs_nffree += i;
-		fs->fs_cstotal.cs_nffree += i;
-		fs->fs_cs(fs, cg).cs_nffree += i;
-		fs->fs_fmod = 1;
-		cgp->cg_frsum[i]++;
+		blkno = ffs_alloccgblk(ip, bp, bpref, rsize);
 		ACTIVECLEAR(fs, cg);
 		UFS_UNLOCK(ump);
 		bdwrite(bp);
 		return (blkno);
 	}
+	KASSERT(size == rsize,
+	    ("ffs_alloccg: size(%d) != rsize(%d)", size, rsize));
 	bno = ffs_mapsearch(fs, cgp, bpref, allocsiz);
 	if (bno < 0)
 		goto fail;
@@ -1502,7 +1502,7 @@ static ufs2_daddr_t
 	ACTIVECLEAR(fs, cg);
 	UFS_UNLOCK(ump);
 	if (DOINGSOFTDEP(ITOV(ip)))
-		softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno);
+		softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno, frags, 0);
 	bdwrite(bp);
 	return (blkno);
 
@@ -1524,10 +1524,11 @@ fail:
  * blocks may be fragmented by the routine that allocates them.
  */
 static ufs2_daddr_t
-ffs_alloccgblk(ip, bp, bpref)
+ffs_alloccgblk(ip, bp, bpref, size)
 	struct inode *ip;
 	struct buf *bp;
 	ufs2_daddr_t bpref;
+	int size;
 {
 	struct fs *fs;
 	struct cg *cgp;
@@ -1535,6 +1536,7 @@ static ufs2_daddr_t
 	ufs1_daddr_t bno;
 	ufs2_daddr_t blkno;
 	u_int8_t *blksfree;
+	int i;
 
 	fs = ip->i_fs;
 	ump = ip->i_ump;
@@ -1562,16 +1564,32 @@ static ufs2_daddr_t
 gotit:
 	blkno = fragstoblks(fs, bno);
 	ffs_clrblock(fs, blksfree, (long)blkno);
-	ffs_clusteracct(ump, fs, cgp, blkno, -1);
+	ffs_clusteracct(fs, cgp, blkno, -1);
 	cgp->cg_cs.cs_nbfree--;
 	fs->fs_cstotal.cs_nbfree--;
 	fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
 	fs->fs_fmod = 1;
 	blkno = cgbase(fs, cgp->cg_cgx) + bno;
+	/*
+	 * If the caller didn't want the whole block free the frags here.
+	 */
+	size = numfrags(fs, size);
+	if (size != fs->fs_frag) {
+		bno = dtogd(fs, blkno);
+		for (i = size; i < fs->fs_frag; i++)
+			setbit(blksfree, bno + i);
+		i = fs->fs_frag - size;
+		cgp->cg_cs.cs_nffree += i;
+		fs->fs_cstotal.cs_nffree += i;
+		fs->fs_cs(fs, cgp->cg_cgx).cs_nffree += i;
+		fs->fs_fmod = 1;
+		cgp->cg_frsum[i]++;
+	}
 	/* XXX Fixme. */
 	UFS_UNLOCK(ump);
 	if (DOINGSOFTDEP(ITOV(ip)))
-		softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno);
+		softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno,
+		    size, 0);
 	UFS_LOCK(ump);
 	return (blkno);
 }
@@ -1584,11 +1602,12 @@ gotit:
  * take the first one that we find following bpref.
  */
 static ufs2_daddr_t
-ffs_clusteralloc(ip, cg, bpref, len)
+ffs_clusteralloc(ip, cg, bpref, len, unused)
 	struct inode *ip;
 	int cg;
 	ufs2_daddr_t bpref;
 	int len;
+	int unused;
 {
 	struct fs *fs;
 	struct cg *cgp;
@@ -1684,7 +1703,7 @@ static ufs2_daddr_t
 	len = blkstofrags(fs, len);
 	UFS_LOCK(ump);
 	for (i = 0; i < len; i += fs->fs_frag)
-		if (ffs_alloccgblk(ip, bp, bno + i) != bno + i)
+		if (ffs_alloccgblk(ip, bp, bno + i, fs->fs_bsize) != bno + i)
 			panic("ffs_clusteralloc: lost block");
 	ACTIVECLEAR(fs, cg);
 	UFS_UNLOCK(ump);
@@ -1708,11 +1727,12 @@ fail:
  *      inode in the specified cylinder group.
  */
 static ufs2_daddr_t
-ffs_nodealloccg(ip, cg, ipref, mode)
+ffs_nodealloccg(ip, cg, ipref, mode, unused)
 	struct inode *ip;
 	int cg;
 	ufs2_daddr_t ipref;
 	int mode;
+	int unused;
 {
 	struct fs *fs;
 	struct cg *cgp;
@@ -1815,28 +1835,6 @@ gotit:
 }
 
 /*
- * check if a block is free
- */
-static int
-ffs_isfreeblock(struct fs *fs, u_char *cp, ufs1_daddr_t h)
-{
-
-	switch ((int)fs->fs_frag) {
-	case 8:
-		return (cp[h] == 0);
-	case 4:
-		return ((cp[h >> 1] & (0x0f << ((h & 0x1) << 2))) == 0);
-	case 2:
-		return ((cp[h >> 2] & (0x03 << ((h & 0x3) << 1))) == 0);
-	case 1:
-		return ((cp[h >> 3] & (0x01 << (h & 0x7))) == 0);
-	default:
-		panic("ffs_isfreeblock");
-	}
-	return (0);
-}
-
-/*
  * Free a block or fragment.
  *
  * The specified block or fragment is placed back in the
@@ -1844,13 +1842,14 @@ gotit:
  * block reassembly is checked.
  */
 void
-ffs_blkfree(ump, fs, devvp, bno, size, inum)
+ffs_blkfree(ump, fs, devvp, bno, size, inum, dephd)
 	struct ufsmount *ump;
 	struct fs *fs;
 	struct vnode *devvp;
 	ufs2_daddr_t bno;
 	long size;
 	ino_t inum;
+	struct workhead *dephd;
 {
 	struct cg *cgp;
 	struct buf *bp;
@@ -1917,7 +1916,7 @@ void
 			panic("ffs_blkfree: freeing free block");
 		}
 		ffs_setblock(fs, blksfree, fragno);
-		ffs_clusteracct(ump, fs, cgp, fragno, 1);
+		ffs_clusteracct(fs, cgp, fragno, 1);
 		cgp->cg_cs.cs_nbfree++;
 		fs->fs_cstotal.cs_nbfree++;
 		fs->fs_cs(fs, cg).cs_nbfree++;
@@ -1957,7 +1956,7 @@ void
 			cgp->cg_cs.cs_nffree -= fs->fs_frag;
 			fs->fs_cstotal.cs_nffree -= fs->fs_frag;
 			fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
-			ffs_clusteracct(ump, fs, cgp, fragno, 1);
+			ffs_clusteracct(fs, cgp, fragno, 1);
 			cgp->cg_cs.cs_nbfree++;
 			fs->fs_cstotal.cs_nbfree++;
 			fs->fs_cs(fs, cg).cs_nbfree++;
@@ -1966,6 +1965,9 @@ void
 	fs->fs_fmod = 1;
 	ACTIVECLEAR(fs, cg);
 	UFS_UNLOCK(ump);
+	if (UFSTOVFS(ump)->mnt_flag & MNT_SOFTDEP)
+		softdep_setup_blkfree(UFSTOVFS(ump), bp, bno,
+		    numfrags(fs, size), dephd);
 	bdwrite(bp);
 }
 
@@ -2036,7 +2038,8 @@ ffs_vfree(pvp, ino, mode)
 		return (0);
 	}
 	ip = VTOI(pvp);
-	return (ffs_freefile(ip->i_ump, ip->i_fs, ip->i_devvp, ino, mode));
+	return (ffs_freefile(ip->i_ump, ip->i_fs, ip->i_devvp, ino, mode,
+	    NULL));
 }
 
 /*
@@ -2044,12 +2047,13 @@ ffs_vfree(pvp, ino, mode)
  * The specified inode is placed back in the free map.
  */
 int
-ffs_freefile(ump, fs, devvp, ino, mode)
+ffs_freefile(ump, fs, devvp, ino, mode, wkhd)
 	struct ufsmount *ump;
 	struct fs *fs;
 	struct vnode *devvp;
 	ino_t ino;
 	int mode;
+	struct workhead *wkhd;
 {
 	struct cg *cgp;
 	struct buf *bp;
@@ -2105,6 +2109,9 @@ int
 	fs->fs_fmod = 1;
 	ACTIVECLEAR(fs, cg);
 	UFS_UNLOCK(ump);
+	if (UFSTOVFS(ump)->mnt_flag & MNT_SOFTDEP)
+		softdep_setup_inofree(UFSTOVFS(ump), bp,
+		    ino + cg * fs->fs_ipg, wkhd);
 	bdwrite(bp);
 	return (0);
 }
@@ -2218,101 +2225,6 @@ ffs_mapsearch(fs, cgp, bpref, allocsiz)
 }
 
 /*
- * Update the cluster map because of an allocation or free.
- *
- * Cnt == 1 means free; cnt == -1 means allocating.
- */
-void
-ffs_clusteracct(ump, fs, cgp, blkno, cnt)
-	struct ufsmount *ump;
-	struct fs *fs;
-	struct cg *cgp;
-	ufs1_daddr_t blkno;
-	int cnt;
-{
-	int32_t *sump;
-	int32_t *lp;
-	u_char *freemapp, *mapp;
-	int i, start, end, forw, back, map, bit;
-
-	mtx_assert(UFS_MTX(ump), MA_OWNED);
-
-	if (fs->fs_contigsumsize <= 0)
-		return;
-	freemapp = cg_clustersfree(cgp);
-	sump = cg_clustersum(cgp);
-	/*
-	 * Allocate or clear the actual block.
-	 */
-	if (cnt > 0)
-		setbit(freemapp, blkno);
-	else
-		clrbit(freemapp, blkno);
-	/*
-	 * Find the size of the cluster going forward.
-	 */
-	start = blkno + 1;
-	end = start + fs->fs_contigsumsize;
-	if (end >= cgp->cg_nclusterblks)
-		end = cgp->cg_nclusterblks;
-	mapp = &freemapp[start / NBBY];
-	map = *mapp++;
-	bit = 1 << (start % NBBY);
-	for (i = start; i < end; i++) {
-		if ((map & bit) == 0)
-			break;
-		if ((i & (NBBY - 1)) != (NBBY - 1)) {
-			bit <<= 1;
-		} else {
-			map = *mapp++;
-			bit = 1;
-		}
-	}
-	forw = i - start;
-	/*
-	 * Find the size of the cluster going backward.
-	 */
-	start = blkno - 1;
-	end = start - fs->fs_contigsumsize;
-	if (end < 0)
-		end = -1;
-	mapp = &freemapp[start / NBBY];
-	map = *mapp--;
-	bit = 1 << (start % NBBY);
-	for (i = start; i > end; i--) {
-		if ((map & bit) == 0)
-			break;
-		if ((i & (NBBY - 1)) != 0) {
-			bit >>= 1;
-		} else {
-			map = *mapp--;
-			bit = 1 << (NBBY - 1);
-		}
-	}
-	back = start - i;
-	/*
-	 * Account for old cluster and the possibly new forward and
-	 * back clusters.
-	 */
-	i = back + forw + 1;
-	if (i > fs->fs_contigsumsize)
-		i = fs->fs_contigsumsize;
-	sump[i] += cnt;
-	if (back > 0)
-		sump[back] -= cnt;
-	if (forw > 0)
-		sump[forw] -= cnt;
-	/*
-	 * Update cluster summary information.
-	 */
-	lp = &sump[fs->fs_contigsumsize];
-	for (i = fs->fs_contigsumsize; i > 0; i--)
-		if (*lp-- > 0)
-			break;
-	fs->fs_maxcluster[cgp->cg_cgx] = i;
-}
-
-/*
  * Fserr prints the name of a filesystem with an error diagnostic.
  *
  * The form of the error message is:
@@ -2532,7 +2444,7 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
 #endif /* DEBUG */
 		while (cmd.size > 0) {
 			if ((error = ffs_freefile(ump, fs, ump->um_devvp,
-			    cmd.value, filetype)))
+			    cmd.value, filetype, NULL)))
 				break;
 			cmd.size -= 1;
 			cmd.value += 1;
@@ -2560,7 +2472,7 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
 			if (blksize > blkcnt)
 				blksize = blkcnt;
 			ffs_blkfree(ump, fs, ump->um_devvp, blkno,
-			    blksize * fs->fs_fsize, ROOTINO);
+			    blksize * fs->fs_fsize, ROOTINO, NULL);
 			blkno += blksize;
 			blkcnt -= blksize;
 			blksize = fs->fs_frag;
Index: /usr/src/sys/ufs/ffs/ffs_extern.h
===================================================================
--- /usr/src/sys/ufs/ffs/ffs_extern.h	(revision 202614)
+++ /usr/src/sys/ufs/ffs/ffs_extern.h	(working copy)
@@ -47,6 +47,7 @@ struct ucred;
 struct vnode;
 struct vop_fsync_args;
 struct vop_reallocblks_args;
+struct workhead;
 
 int	ffs_alloc(struct inode *, ufs2_daddr_t, ufs2_daddr_t, int, int,
 	    struct ucred *, ufs2_daddr_t *);
@@ -56,20 +57,23 @@ int	ffs_balloc_ufs2(struct vnode *a_vp, off_t a_st
             struct ucred *a_cred, int a_flags, struct buf **a_bpp);
 int	ffs_blkatoff(struct vnode *, off_t, char **, struct buf **);
 void	ffs_blkfree(struct ufsmount *, struct fs *, struct vnode *,
-	    ufs2_daddr_t, long, ino_t);
+	    ufs2_daddr_t, long, ino_t, struct workhead *);
 ufs2_daddr_t ffs_blkpref_ufs1(struct inode *, ufs_lbn_t, int, ufs1_daddr_t *);
 ufs2_daddr_t ffs_blkpref_ufs2(struct inode *, ufs_lbn_t, int, ufs2_daddr_t *);
 int	ffs_checkfreefile(struct fs *, struct vnode *, ino_t);
 void	ffs_clrblock(struct fs *, u_char *, ufs1_daddr_t);
+void	ffs_clusteracct(struct fs *, struct cg *, ufs1_daddr_t, int);
 void	ffs_bdflush(struct bufobj *, struct buf *);
 int	ffs_copyonwrite(struct vnode *, struct buf *);
 int	ffs_flushfiles(struct mount *, int, struct thread *);
 void	ffs_fragacct(struct fs *, int, int32_t [], int);
 int	ffs_freefile(struct ufsmount *, struct fs *, struct vnode *, ino_t,
-	    int);
+	    int, struct workhead *);
 int	ffs_isblock(struct fs *, u_char *, ufs1_daddr_t);
+int	ffs_isfreeblock(struct fs *, u_char *, ufs1_daddr_t);
 void	ffs_load_inode(struct buf *, struct inode *, struct fs *, ino_t);
 int	ffs_mountroot(void);
+void	ffs_oldfscompat_write(struct fs *, struct ufsmount *);
 int	ffs_reallocblks(struct vop_reallocblks_args *);
 int	ffs_realloccg(struct inode *, ufs2_daddr_t, ufs2_daddr_t,
 	    ufs2_daddr_t, int, int, int, struct ucred *, struct buf **);
@@ -108,7 +112,8 @@ void	softdep_initialize(void);
 void	softdep_uninitialize(void);
 int	softdep_mount(struct vnode *, struct mount *, struct fs *,
 	    struct ucred *);
-void	softdep_move_dependencies(struct buf *, struct buf *);
+void	softdep_unmount(struct mount *);
+int	softdep_move_dependencies(struct buf *, struct buf *);
 int	softdep_flushworklist(struct mount *, int *, struct thread *);
 int	softdep_flushfiles(struct mount *, int, struct thread *);
 void	softdep_update_inodeblock(struct inode *, struct buf *, int);
@@ -117,7 +122,8 @@ void	softdep_freefile(struct vnode *, ino_t, int);
 int	softdep_request_cleanup(struct fs *, struct vnode *);
 void	softdep_setup_freeblocks(struct inode *, off_t, int);
 void	softdep_setup_inomapdep(struct buf *, struct inode *, ino_t);
-void	softdep_setup_blkmapdep(struct buf *, struct mount *, ufs2_daddr_t);
+void	softdep_setup_blkmapdep(struct buf *, struct mount *, ufs2_daddr_t,
+	    int, int);
 void	softdep_setup_allocdirect(struct inode *, ufs_lbn_t, ufs2_daddr_t,
 	    ufs2_daddr_t, long, long, struct buf *);
 void	softdep_setup_allocext(struct inode *, ufs_lbn_t, ufs2_daddr_t,
@@ -126,11 +132,17 @@ void	softdep_setup_allocindir_meta(struct buf *, s
 	    struct buf *, int, ufs2_daddr_t);
 void	softdep_setup_allocindir_page(struct inode *, ufs_lbn_t,
 	    struct buf *, int, ufs2_daddr_t, ufs2_daddr_t, struct buf *);
+void	softdep_setup_blkfree(struct mount *, struct buf *, ufs2_daddr_t, int,
+	    struct workhead *);
+void	softdep_setup_inofree(struct mount *, struct buf *, ino_t,
+	    struct workhead *);
+void	softdep_setup_sbupdate(struct ufsmount *, struct fs *, struct buf *);
 void	softdep_fsync_mountdev(struct vnode *);
 int	softdep_sync_metadata(struct vnode *);
 int     softdep_process_worklist(struct mount *, int);
 int     softdep_fsync(struct vnode *);
 int	softdep_waitidle(struct mount *);
+int	softdep_prealloc(struct vnode *, int);
 
 int	ffs_rdonly(struct inode *);
 
Index: /usr/src/sys/ufs/ffs/ffs_subr.c
===================================================================
--- /usr/src/sys/ufs/ffs/ffs_subr.c	(revision 202614)
+++ /usr/src/sys/ufs/ffs/ffs_subr.c	(working copy)
@@ -37,7 +37,6 @@ __FBSDID("$FreeBSD$");
 #ifndef _KERNEL
 #include <ufs/ufs/dinode.h>
 #include <ufs/ffs/fs.h>
-#include "fsck.h"
 #else
 #include <sys/systm.h>
 #include <sys/lock.h>
@@ -223,12 +222,43 @@ ffs_isblock(fs, cp, h)
 		mask = 0x01 << (h & 0x7);
 		return ((cp[h >> 3] & mask) == mask);
 	default:
+#ifdef _KERNEL
 		panic("ffs_isblock");
+#endif
+		break;
 	}
 	return (0);
 }
 
 /*
+ * check if a block is free
+ */
+int
+ffs_isfreeblock(fs, cp, h)
+	struct fs *fs;
+	u_char *cp;
+	ufs1_daddr_t h;
+{
+ 
+	switch ((int)fs->fs_frag) {
+	case 8:
+		return (cp[h] == 0);
+	case 4:
+		return ((cp[h >> 1] & (0x0f << ((h & 0x1) << 2))) == 0);
+	case 2:
+		return ((cp[h >> 2] & (0x03 << ((h & 0x3) << 1))) == 0);
+	case 1:
+		return ((cp[h >> 3] & (0x01 << (h & 0x7))) == 0);
+	default:
+#ifdef _KERNEL
+		panic("ffs_isfreeblock");
+#endif
+		break;
+	}
+	return (0);
+}
+
+/*
  * take a block out of the map
  */
 void
@@ -252,7 +282,10 @@ ffs_clrblock(fs, cp, h)
 		cp[h >> 3] &= ~(0x01 << (h & 0x7));
 		return;
 	default:
+#ifdef _KERNEL
 		panic("ffs_clrblock");
+#endif
+		break;
 	}
 }
 
@@ -281,6 +314,101 @@ ffs_setblock(fs, cp, h)
 		cp[h >> 3] |= (0x01 << (h & 0x7));
 		return;
 	default:
+#ifdef _KERNEL
 		panic("ffs_setblock");
+#endif
+		break;
 	}
 }
+
+/*
+ * Update the cluster map because of an allocation or free.
+ *
+ * Cnt == 1 means free; cnt == -1 means allocating.
+ */
+void
+ffs_clusteracct(fs, cgp, blkno, cnt)
+	struct fs *fs;
+	struct cg *cgp;
+	ufs1_daddr_t blkno;
+	int cnt;
+{
+	int32_t *sump;
+	int32_t *lp;
+	u_char *freemapp, *mapp;
+	int i, start, end, forw, back, map, bit;
+
+	if (fs->fs_contigsumsize <= 0)
+		return;
+	freemapp = cg_clustersfree(cgp);
+	sump = cg_clustersum(cgp);
+	/*
+	 * Allocate or clear the actual block.
+	 */
+	if (cnt > 0)
+		setbit(freemapp, blkno);
+	else
+		clrbit(freemapp, blkno);
+	/*
+	 * Find the size of the cluster going forward.
+	 */
+	start = blkno + 1;
+	end = start + fs->fs_contigsumsize;
+	if (end >= cgp->cg_nclusterblks)
+		end = cgp->cg_nclusterblks;
+	mapp = &freemapp[start / NBBY];
+	map = *mapp++;
+	bit = 1 << (start % NBBY);
+	for (i = start; i < end; i++) {
+		if ((map & bit) == 0)
+			break;
+		if ((i & (NBBY - 1)) != (NBBY - 1)) {
+			bit <<= 1;
+		} else {
+			map = *mapp++;
+			bit = 1;
+		}
+	}
+	forw = i - start;
+	/*
+	 * Find the size of the cluster going backward.
+	 */
+	start = blkno - 1;
+	end = start - fs->fs_contigsumsize;
+	if (end < 0)
+		end = -1;
+	mapp = &freemapp[start / NBBY];
+	map = *mapp--;
+	bit = 1 << (start % NBBY);
+	for (i = start; i > end; i--) {
+		if ((map & bit) == 0)
+			break;
+		if ((i & (NBBY - 1)) != 0) {
+			bit >>= 1;
+		} else {
+			map = *mapp--;
+			bit = 1 << (NBBY - 1);
+		}
+	}
+	back = start - i;
+	/*
+	 * Account for old cluster and the possibly new forward and
+	 * back clusters.
+	 */
+	i = back + forw + 1;
+	if (i > fs->fs_contigsumsize)
+		i = fs->fs_contigsumsize;
+	sump[i] += cnt;
+	if (back > 0)
+		sump[back] -= cnt;
+	if (forw > 0)
+		sump[forw] -= cnt;
+	/*
+	 * Update cluster summary information.
+	 */
+	lp = &sump[fs->fs_contigsumsize];
+	for (i = fs->fs_contigsumsize; i > 0; i--)
+		if (*lp-- > 0)
+			break;
+	fs->fs_maxcluster[cgp->cg_cgx] = i;
+}
Index: /usr/src/sys/ufs/ffs/softdep.h
===================================================================
--- /usr/src/sys/ufs/ffs/softdep.h	(revision 202614)
+++ /usr/src/sys/ufs/ffs/softdep.h	(working copy)
@@ -94,22 +94,28 @@
  * The ONWORKLIST flag shows whether the structure is currently linked
  * onto a worklist.
  */
-#define	ATTACHED	0x0001
-#define	UNDONE		0x0002
-#define	COMPLETE	0x0004
-#define	DEPCOMPLETE	0x0008
-#define	MKDIR_PARENT	0x0010	/* diradd & mkdir only */
-#define	MKDIR_BODY	0x0020	/* diradd & mkdir only */
-#define	RMDIR		0x0040	/* dirrem only */
-#define	DIRCHG		0x0080	/* diradd & dirrem only */
-#define	GOINGAWAY	0x0100	/* indirdep only */
-#define	IOSTARTED	0x0200	/* inodedep & pagedep only */
-#define	SPACECOUNTED	0x0400	/* inodedep only */
-#define	NEWBLOCK	0x0800	/* pagedep only */
-#define	INPROGRESS	0x1000	/* dirrem, freeblks, freefrag, freefile only */
-#define	UFS1FMT		0x2000	/* indirdep only */
-#define	EXTDATA		0x4000	/* allocdirect only */
-#define ONWORKLIST	0x8000
+#define	ATTACHED	0x000001
+#define	UNDONE		0x000002
+#define	COMPLETE	0x000004
+#define	DEPCOMPLETE	0x000008
+#define	MKDIR_PARENT	0x000010 /* diradd, mkdir, jaddref, jsegdep only */
+#define	MKDIR_BODY	0x000020 /* diradd, mkdir, jaddref only */
+#define	RMDIR		0x000040 /* dirrem only */
+#define	DIRCHG		0x000080 /* diradd, dirrem only */
+#define	GOINGAWAY	0x000100 /* indirdep, jremref only */
+#define	IOSTARTED	0x000200 /* inodedep, pagedep, bmsafemap only */
+#define	SPACECOUNTED	0x000400 /* inodedep only */
+#define	NEWBLOCK	0x000800 /* pagedep, jaddref only */
+#define	INPROGRESS	0x001000 /* dirrem, freeblks, freefrag, freefile only */
+#define	UFS1FMT		0x002000 /* indirdep only */
+#define	EXTDATA		0x004000 /* allocdirect only */
+#define ONWORKLIST	0x008000
+#define	IOWAITING	0x010000 /* Thread is waiting for IO to complete. */
+#define	ONDEPLIST	0x020000 /* Structure is on a dependency list. */
+#define	UNLINKED	0x040000 /* inodedep has been unlinked. */
+#define	UNLINKNEXT	0x080000 /* inodedep has valid di_freelink */
+#define	UNLINKPREV	0x100000 /* inodedep is pointed at in the unlink list */
+#define	UNLINKLINKS	(UNLINKNEXT | UNLINKPREV)
 
 #define	ALLCOMPLETE	(ATTACHED | COMPLETE | DEPCOMPLETE)
 
@@ -135,25 +141,37 @@
  * and the macros below changed to use it.
  */
 struct worklist {
+	LIST_ENTRY(worklist)	wk_list;	/* list of work requests */
 	struct mount		*wk_mp;		/* Mount we live in */
-	LIST_ENTRY(worklist)	wk_list;	/* list of work requests */
-	unsigned short		wk_type;	/* type of request */
-	unsigned short		wk_state;	/* state flags */
+	unsigned int		wk_type:8,	/* type of request */
+				wk_state:24;	/* state flags */
 };
 #define WK_DATA(wk) ((void *)(wk))
 #define WK_PAGEDEP(wk) ((struct pagedep *)(wk))
 #define WK_INODEDEP(wk) ((struct inodedep *)(wk))
 #define WK_BMSAFEMAP(wk) ((struct bmsafemap *)(wk))
+#define	WK_NEWBLK(wk)  ((struct newblk *)(wk))
 #define WK_ALLOCDIRECT(wk) ((struct allocdirect *)(wk))
 #define WK_INDIRDEP(wk) ((struct indirdep *)(wk))
 #define WK_ALLOCINDIR(wk) ((struct allocindir *)(wk))
 #define WK_FREEFRAG(wk) ((struct freefrag *)(wk))
 #define WK_FREEBLKS(wk) ((struct freeblks *)(wk))
+#define WK_FREEWORK(wk) ((struct freework *)(wk))
 #define WK_FREEFILE(wk) ((struct freefile *)(wk))
 #define WK_DIRADD(wk) ((struct diradd *)(wk))
 #define WK_MKDIR(wk) ((struct mkdir *)(wk))
 #define WK_DIRREM(wk) ((struct dirrem *)(wk))
 #define WK_NEWDIRBLK(wk) ((struct newdirblk *)(wk))
+#define	WK_JADDREF(wk) ((struct jaddref *)(wk))
+#define	WK_JREMREF(wk) ((struct jremref *)(wk))
+#define	WK_JMVREF(wk) ((struct jmvref *)(wk))
+#define	WK_JSEGDEP(wk) ((struct jsegdep *)(wk))
+#define	WK_JSEG(wk) ((struct jseg *)(wk))
+#define	WK_JNEWBLK(wk) ((struct jnewblk *)(wk))
+#define	WK_JFREEBLK(wk) ((struct jfreeblk *)(wk))
+#define	WK_FREEDEP(wk) ((struct freedep *)(wk))
+#define	WK_JFREEFRAG(wk) ((struct jfreefrag *)(wk))
+#define	WK_SBDEP(wk) ((struct sbdep *)wk)
 
 /*
  * Various types of lists
@@ -165,6 +183,15 @@ LIST_HEAD(inodedephd, inodedep);
 LIST_HEAD(allocindirhd, allocindir);
 LIST_HEAD(allocdirecthd, allocdirect);
 TAILQ_HEAD(allocdirectlst, allocdirect);
+LIST_HEAD(indirdephd, indirdep);
+LIST_HEAD(jaddrefhd, jaddref);
+LIST_HEAD(jremrefhd, jremref);
+LIST_HEAD(jmvrefhd, jmvref);
+LIST_HEAD(jnewblkhd, jnewblk);
+LIST_HEAD(jfreeblkhd, jfreeblk);
+LIST_HEAD(freeworkhd, freework);
+TAILQ_HEAD(jseglst, jseg);
+TAILQ_HEAD(inoreflst, inoref);
 
 /*
  * The "pagedep" structure tracks the various dependencies related to
@@ -192,9 +219,11 @@ struct pagedep {
 	LIST_ENTRY(pagedep) pd_hash;	/* hashed lookup */
 	ino_t	pd_ino;			/* associated file */
 	ufs_lbn_t pd_lbn;		/* block within file */
+	struct	newdirblk *pd_newdirblk; /* associated newdirblk if NEWBLOCK */
 	struct	dirremhd pd_dirremhd;	/* dirrem's waiting for page */
 	struct	diraddhd pd_diraddhd[DAHASHSZ]; /* diradd dir entry updates */
 	struct	diraddhd pd_pendinghd;	/* directory entries awaiting write */
+	struct	jmvrefhd pd_jmvrefhd;	/* Dependent journal writes. */
 };
 
 /*
@@ -248,13 +277,18 @@ struct inodedep {
 	struct	worklist id_list;	/* buffer holding inode block */
 #	define	id_state id_list.wk_state /* inode dependency state */
 	LIST_ENTRY(inodedep) id_hash;	/* hashed lookup */
+	TAILQ_ENTRY(inodedep) id_unlinked;	/* Unlinked but ref'd inodes */
 	struct	fs *id_fs;		/* associated filesystem */
 	ino_t	id_ino;			/* dependent inode */
 	nlink_t	id_nlinkdelta;		/* saved effective link count */
+	nlink_t	id_savednlink;		/* Link saved during rollback */
 	LIST_ENTRY(inodedep) id_deps;	/* bmsafemap's list of inodedep's */
-	struct	buf *id_buf;		/* related bmsafemap (if pending) */
+	struct	bmsafemap *id_bmsafemap; /* related bmsafemap (if pending) */
+	struct	diradd *id_mkdiradd;	/* diradd for a mkdir. */
+	struct	inoreflst id_inoreflst;	/* Inode reference adjustments. */
 	long	id_savedextsize;	/* ext size saved during rollback */
 	off_t	id_savedsize;		/* file size saved during rollback */
+	struct	dirremhd id_dirremhd;	/* Removals pending. */
 	struct	workhead id_pendinghd;	/* entries awaiting directory write */
 	struct	workhead id_bufwait;	/* operations after inode written */
 	struct	workhead id_inowait;	/* operations waiting inode update */
@@ -271,23 +305,6 @@ struct inodedep {
 #define id_savedino2 id_un.idu_savedino2
 
 /*
- * A "newblk" structure is attached to a bmsafemap structure when a block
- * or fragment is allocated from a cylinder group. Its state is set to
- * DEPCOMPLETE when its cylinder group map is written. It is consumed by
- * an associated allocdirect or allocindir allocation which will attach
- * themselves to the bmsafemap structure if the newblk's DEPCOMPLETE flag
- * is not set (i.e., its cylinder group map has not been written).
- */ 
-struct newblk {
-	LIST_ENTRY(newblk) nb_hash;	/* hashed lookup */
-	struct	fs *nb_fs;		/* associated filesystem */
-	int	nb_state;		/* state of bitmap dependency */
-	ufs2_daddr_t nb_newblkno;	/* allocated block number */
-	LIST_ENTRY(newblk) nb_deps;	/* bmsafemap's list of newblk's */
-	struct	bmsafemap *nb_bmsafemap; /* associated bmsafemap */
-};
-
-/*
  * A "bmsafemap" structure maintains a list of dependency structures
  * that depend on the update of a particular cylinder group map.
  * It has lists for newblks, allocdirects, allocindirs, and inodedeps.
@@ -299,14 +316,44 @@ struct inodedep {
  */
 struct bmsafemap {
 	struct	worklist sm_list;	/* cylgrp buffer */
+#	define	sm_state sm_list.wk_state
+	int	sm_cg;
+	LIST_ENTRY(bmsafemap) sm_hash;	/* Hash links. */
 	struct	buf *sm_buf;		/* associated buffer */
 	struct	allocdirecthd sm_allocdirecthd; /* allocdirect deps */
+	struct	allocdirecthd sm_allocdirectwr; /* writing allocdirect deps */
 	struct	allocindirhd sm_allocindirhd; /* allocindir deps */
+	struct	allocindirhd sm_allocindirwr; /* writing allocindir deps */
 	struct	inodedephd sm_inodedephd; /* inodedep deps */
+	struct	inodedephd sm_inodedepwr; /* writing inodedep deps */
 	struct	newblkhd sm_newblkhd;	/* newblk deps */
+	struct	newblkhd sm_newblkwr;	/* writing newblk deps */
+	struct	jaddrefhd sm_jaddrefhd;	/* Pending inode allocations. */
+	struct	jnewblkhd sm_jnewblkhd;	/* Pending block allocations. */
 };
 
 /*
+ * A "newblk" structure is attached to a bmsafemap structure when a block
+ * or fragment is allocated from a cylinder group. Its state is set to
+ * DEPCOMPLETE when its cylinder group map is written. It is converted to
+ * an allocdirect or allocindir allocation once the allocator calls the
+ * appropriate setup function.
+ */ 
+struct newblk {
+	struct	worklist nb_list;
+#	define	nb_state nb_list.wk_state
+	LIST_ENTRY(newblk) nb_hash;	/* hashed lookup */
+	LIST_ENTRY(newblk) nb_deps; /* bmsafemap's list of newblks */
+	struct	jnewblk *nb_jnewblk;	/* New block journal entry. */
+	struct	bmsafemap *nb_bmsafemap;/* cylgrp dep (if pending) */
+	struct	freefrag *nb_freefrag;	/* fragment to be freed (if any) */
+	struct	indirdephd nb_indirdeps; /* Children indirect blocks. */
+	struct	workhead nb_newdirblk;	/* dir block to notify when written */
+	struct	workhead nb_jwork;	/* Journal work pending. */
+	ufs2_daddr_t	nb_newblkno;	/* new value of block pointer */
+};
+
+/*
  * An "allocdirect" structure is attached to an "inodedep" when a new block
  * or fragment is allocated and pointed to by the inode described by
  * "inodedep". The worklist is linked to the buffer that holds the block.
@@ -334,20 +381,18 @@ struct bmsafemap {
  * and inodedep->id_pendinghd lists.
  */
 struct allocdirect {
-	struct	worklist ad_list;	/* buffer holding block */
-#	define	ad_state ad_list.wk_state /* block pointer state */
+	struct	newblk ad_block;	/* Common block logic */
+#	define	ad_state ad_block.nb_list.wk_state /* block pointer state */
 	TAILQ_ENTRY(allocdirect) ad_next; /* inodedep's list of allocdirect's */
-	ufs_lbn_t ad_lbn;		/* block within file */
-	ufs2_daddr_t ad_newblkno;	/* new value of block pointer */
-	ufs2_daddr_t ad_oldblkno;	/* old value of block pointer */
-	long	ad_newsize;		/* size of new block */
-	long	ad_oldsize;		/* size of old block */
-	LIST_ENTRY(allocdirect) ad_deps; /* bmsafemap's list of allocdirect's */
-	struct	buf *ad_buf;		/* cylgrp buffer (if pending) */
 	struct	inodedep *ad_inodedep;	/* associated inodedep */
-	struct	freefrag *ad_freefrag;	/* fragment to be freed (if any) */
-	struct	workhead ad_newdirblk;	/* dir block to notify when written */
+	ufs2_daddr_t	ad_oldblkno;	/* old value of block pointer */
+	int		ad_offset;	/* Pointer offset in parent. */
+	long		ad_newsize;	/* size of new block */
+	long		ad_oldsize;	/* size of old block */
 };
+#define	ad_newblkno	ad_block.nb_newblkno
+#define	ad_freefrag	ad_block.nb_freefrag
+#define	ad_newdirblk	ad_block.nb_newdirblk
 
 /*
  * A single "indirdep" structure manages all allocation dependencies for
@@ -369,10 +414,14 @@ struct allocdirect {
 struct indirdep {
 	struct	worklist ir_list;	/* buffer holding indirect block */
 #	define	ir_state ir_list.wk_state /* indirect block pointer state */
-	caddr_t ir_saveddata;		/* buffer cache contents */
+	LIST_ENTRY(indirdep) ir_next;	/* alloc{direct,indir} list */
+	caddr_t	ir_saveddata;		/* buffer cache contents */
 	struct	buf *ir_savebp;		/* buffer holding safe copy */
+	struct	allocindirhd ir_completehd; /* waiting for indirdep complete */
+	struct	allocindirhd ir_writehd; /* Waiting for the pointer write. */
 	struct	allocindirhd ir_donehd;	/* done waiting to update safecopy */
 	struct	allocindirhd ir_deplisthd; /* allocindir deps for this block */
+	struct	workhead ir_jwork;	/* Journal work pending. */
 };
 
 /*
@@ -389,31 +438,39 @@ struct indirdep {
  * can then be freed as it is no longer applicable.
  */
 struct allocindir {
-	struct	worklist ai_list;	/* buffer holding indirect block */
-#	define	ai_state ai_list.wk_state /* indirect block pointer state */
+	struct	newblk ai_block;	/* Common block area */
+#	define	ai_state ai_block.nb_list.wk_state /* indirect pointer state */
 	LIST_ENTRY(allocindir) ai_next;	/* indirdep's list of allocindir's */
-	int	ai_offset;		/* pointer offset in indirect block */
-	ufs2_daddr_t ai_newblkno;	/* new block pointer value */
-	ufs2_daddr_t ai_oldblkno;	/* old block pointer value */
-	struct	freefrag *ai_freefrag;	/* block to be freed when complete */
 	struct	indirdep *ai_indirdep;	/* address of associated indirdep */
-	LIST_ENTRY(allocindir) ai_deps;	/* bmsafemap's list of allocindir's */
-	struct	buf *ai_buf;		/* cylgrp buffer (if pending) */
+	ufs2_daddr_t	ai_oldblkno;	/* old value of block pointer */
+	int		ai_offset;	/* Pointer offset in parent. */
 };
+#define	ai_newblkno	ai_block.nb_newblkno
+#define	ai_freefrag	ai_block.nb_freefrag
+#define	ai_newdirblk	ai_block.nb_newdirblk
 
 /*
+ * The allblk union is used to size the newblk structure on allocation so
+ * that it may be any one of three types.
+ */
+union allblk {
+	struct	allocindir ab_allocindir;
+	struct	allocdirect ab_allocdirect;
+	struct	newblk	ab_newblk;
+};
+
+/*
  * A "freefrag" structure is attached to an "inodedep" when a previously
  * allocated fragment is replaced with a larger fragment, rather than extended.
  * The "freefrag" structure is constructed and attached when the replacement
  * block is first allocated. It is processed after the inode claiming the
- * bigger block that replaces it has been written to disk. Note that the
- * ff_state field is is used to store the uid, so may lose data. However,
- * the uid is used only in printing an error message, so is not critical.
- * Keeping it in a short keeps the data structure down to 32 bytes.
+ * bigger block that replaces it has been written to disk.
  */
 struct freefrag {
 	struct	worklist ff_list;	/* id_inowait or delayed worklist */
-#	define	ff_state ff_list.wk_state /* owning user; should be uid_t */
+#	define	ff_state ff_list.wk_state
+	struct	jfreefrag *ff_jfreefrag; /* Associated journal entry. */
+	struct	workhead ff_jwork;	/* Journal work pending. */
 	ufs2_daddr_t ff_blkno;		/* fragment physical block number */
 	long	ff_fragsize;		/* size of fragment being deleted */
 	ino_t	ff_inum;		/* owning inode number */
@@ -423,23 +480,60 @@ struct freefrag {
  * A "freeblks" structure is attached to an "inodedep" when the
  * corresponding file's length is reduced to zero. It records all
  * the information needed to free the blocks of a file after its
- * zero'ed inode has been written to disk.
+ * zero'ed inode has been written to disk.  The actual work is done
+ * by child freework structures which are responsible for individual
+ * inode pointers while freeblks is responsible for retiring the
+ * entire operation when it is complete and holding common members.
  */
 struct freeblks {
 	struct	worklist fb_list;	/* id_inowait or delayed worklist */
 #	define	fb_state fb_list.wk_state /* inode and dirty block state */
+	struct	jfreeblkhd fb_jfreeblkhd; /* Journal entries pending */
+	struct	workhead fb_freeworkhd;	/* Work items pending */
+	struct	workhead fb_jwork;	/* Journal work pending */
 	ino_t	fb_previousinum;	/* inode of previous owner of blocks */
 	uid_t	fb_uid;			/* uid of previous owner of blocks */
 	struct	vnode *fb_devvp;	/* filesystem device vnode */
-	long	fb_oldextsize;		/* previous ext data size */
-	off_t	fb_oldsize;		/* previous file size */
 	ufs2_daddr_t fb_chkcnt;		/* used to check cnt of blks released */
-	ufs2_daddr_t fb_dblks[NDADDR];	/* direct blk ptrs to deallocate */
-	ufs2_daddr_t fb_iblks[NIADDR];	/* indirect blk ptrs to deallocate */
-	ufs2_daddr_t fb_eblks[NXADDR];	/* indirect blk ptrs to deallocate */
+	int	fb_ref;			/* Children outstanding. */
 };
 
 /*
+ * A "freework" structure handles the release of a tree of blocks or a single
+ * block.  Each indirect block in a tree is allocated its own freework
+ * structure so that the indrect block may be freed only when all of its
+ * children are freed.  In this way we enforce the rule that an allocated
+ * block must have a valid path to a root that is journaled.  Each child
+ * block acquires a reference and when the ref hits zero the parent ref
+ * is decremented.  If there is no parent the freeblks ref is decremented.
+ */
+struct freework {
+	struct	worklist fw_list;
+#	define	fw_state fw_list.wk_state
+	LIST_ENTRY(freework) fw_next;		/* Queue for freeblksk. */
+	struct	freeblks *fw_freeblks;		/* Root of operation. */
+	struct	freework *fw_parent;		/* Parent indirect. */
+	ufs2_daddr_t	 fw_blkno;		/* Our block #. */
+	ufs_lbn_t	 fw_lbn;		/* Original lbn before free. */
+	int		 fw_frags;		/* Number of frags. */
+	int		 fw_ref;		/* Number of children out. */
+	int		 fw_off;		/* Current working position. */
+	struct	workhead fw_jwork;		/* Journal work pending. */
+};
+
+/*
+ * A "freedep" structure is allocated to track the completion of a bitmap
+ * write for a freework.  One freedep may cover many freed blocks so long
+ * as they reside in the same cylinder group.  When the cg is written
+ * the freedep decrements the ref on the freework which may permit it
+ * to be freed as well.
+ */
+struct freedep {
+	struct	worklist fd_list;
+	struct	freework *fd_freework;	/* Parent freework. */
+};
+
+/*
  * A "freefile" structure is attached to an inode when its
  * link count is reduced to zero. It marks the inode as free in
  * the cylinder group map after the zero'ed inode has been written
@@ -450,6 +544,7 @@ struct freefile {
 	mode_t	fx_mode;		/* mode of inode */
 	ino_t	fx_oldinum;		/* inum of the unlinked file */
 	struct	vnode *fx_devvp;	/* filesystem device vnode */
+	struct	workhead fx_jwork;	/* journal work pending. */
 };
 
 /*
@@ -482,12 +577,11 @@ struct freefile {
  * than zero.
  *
  * The overlaying of da_pagedep and da_previous is done to keep the
- * structure down to 32 bytes in size on a 32-bit machine. If a
- * da_previous entry is present, the pointer to its pagedep is available
- * in the associated dirrem entry. If the DIRCHG flag is set, the
- * da_previous entry is valid; if not set the da_pagedep entry is valid.
- * The DIRCHG flag never changes; it is set when the structure is created
- * if appropriate and is never cleared.
+ * structure down. If a da_previous entry is present, the pointer to its
+ * pagedep is available in the associated dirrem entry. If the DIRCHG flag
+ * is set, the da_previous entry is valid; if not set the da_pagedep entry
+ * is valid. The DIRCHG flag never changes; it is set when the structure
+ * is created if appropriate and is never cleared.
  */
 struct diradd {
 	struct	worklist da_list;	/* id_inowait or id_pendinghd list */
@@ -499,6 +593,7 @@ struct diradd {
 	struct	dirrem *dau_previous;	/* entry being replaced in dir change */
 	struct	pagedep *dau_pagedep;	/* pagedep dependency for addition */
 	} da_un;
+	struct workhead da_jwork;	/* Journal work awaiting completion. */
 };
 #define da_previous da_un.dau_previous
 #define da_pagedep da_un.dau_pagedep
@@ -525,12 +620,13 @@ struct diradd {
  * mkdir structures that reference it. The deletion would be faster if the
  * diradd structure were simply augmented to have two pointers that referenced
  * the associated mkdir's. However, this would increase the size of the diradd
- * structure from 32 to 64-bits to speed a very infrequent operation.
+ * structure to speed a very infrequent operation.
  */
 struct mkdir {
 	struct	worklist md_list;	/* id_inowait or buffer holding dir */
 #	define	md_state md_list.wk_state /* type: MKDIR_PARENT or MKDIR_BODY */
 	struct	diradd *md_diradd;	/* associated diradd */
+	struct	jaddref *md_jaddref;	/* dependent jaddref. */
 	struct	buf *md_buf;		/* MKDIR_BODY: buffer holding dir */
 	LIST_ENTRY(mkdir) md_mkdirs;	/* list of all mkdirs */
 };
@@ -542,20 +638,19 @@ LIST_HEAD(mkdirlist, mkdir) mkdirlisthd;
  * list of the pagedep for the directory page that contains the entry.
  * It is processed after the directory page with the deleted entry has
  * been written to disk.
- *
- * The overlaying of dm_pagedep and dm_dirinum is done to keep the
- * structure down to 32 bytes in size on a 32-bit machine. It works
- * because they are never used concurrently.
  */
 struct dirrem {
 	struct	worklist dm_list;	/* delayed worklist */
 #	define	dm_state dm_list.wk_state /* state of the old directory entry */
 	LIST_ENTRY(dirrem) dm_next;	/* pagedep's list of dirrem's */
+	LIST_ENTRY(dirrem) dm_inonext;	/* inodedep's list of dirrem's */
+	struct	jremrefhd dm_jremrefhd;	/* Pending remove reference deps. */
 	ino_t	dm_oldinum;		/* inum of the removed dir entry */
 	union {
 	struct	pagedep *dmu_pagedep;	/* pagedep dependency for remove */
 	ino_t	dmu_dirinum;		/* parent inode number (for rmdir) */
 	} dm_un;
+	struct workhead dm_jwork;	/* Journal work awaiting completion. */
 };
 #define dm_pagedep dm_un.dmu_pagedep
 #define dm_dirinum dm_un.dmu_dirinum
@@ -577,9 +672,186 @@ struct dirrem {
  * blocks using a similar scheme with the allocindir structures. Rather
  * than adding this level of complexity, we simply write those newly 
  * allocated indirect blocks synchronously as such allocations are rare.
+ * In the case of a new directory the . and .. links are tracked with
+ * a mkdir rather than a pagedep.  In this case we track the mkdir
+ * so it can be released when it is written.  A workhead is used
+ * to simplify canceling a mkdir that is removed by a subsequent dirrem.
  */
 struct newdirblk {
 	struct	worklist db_list;	/* id_inowait or pg_newdirblk */
 #	define	db_state db_list.wk_state /* unused */
 	struct	pagedep *db_pagedep;	/* associated pagedep */
+	struct	workhead db_mkdir;
 };
+
+/*
+ * The inoref structure holds the elements common to jaddref and jremref
+ * so they may easily be queued in-order on the inodedep.
+ */
+struct inoref {
+	struct	worklist if_list;
+#	define	if_state if_list.wk_state
+	TAILQ_ENTRY(inoref) if_deps;	/* Links for inodedep. */
+	struct	jsegdep	*if_jsegdep;
+	off_t		if_diroff;	/* Directory offset. */
+	ino_t		if_ino;		/* Inode number. */
+	ino_t		if_parent;	/* Parent inode number. */
+	nlink_t		if_nlink;	/* nlink before addition. */
+	uint16_t	if_mode;	/* File mode, needed for IFMT. */
+};
+
+/*
+ * A "jaddref" structure tracks a new reference (link count) on an inode
+ * and prevents the link count increase and bitmap allocation until a
+ * journal entry can be written.  Once the journal entry is written,
+ * the inode is put on the pendinghd of the bmsafemap and a diradd or
+ * mkdir entry is placed on the bufwait list of the inode.  The DEPCOMPLETE
+ * flag is used to indicate that all of the required information for writing
+ * the journal entry is present.  MKDIR_BODY and MKDIR_PARENT are used to
+ * differentiate . and .. links from regular file names.  NEWBLOCK indicates
+ * a bitmap is still pending.  If a new reference is canceled by a delete
+ * prior to writing the journal the jaddref write is canceled and the
+ * structure persists to prevent any disk-visible changes until it is
+ * ultimately released when the file is freed or the link is dropped again.
+ */
+struct jaddref {
+	struct	inoref	ja_ref;
+#	define	ja_list	ja_ref.if_list	/* Journal pending or jseg entries. */
+#	define	ja_state ja_ref.if_list.wk_state
+	LIST_ENTRY(jaddref) ja_bmdeps;	/* Links for bmsafemap. */
+	union {
+		struct	diradd	*jau_diradd;	/* Pending diradd. */
+		struct	mkdir	*jau_mkdir;	/* MKDIR_{PARENT,BODY} */
+	} ja_un;
+};
+#define	ja_diradd	ja_un.jau_diradd
+#define	ja_mkdir	ja_un.jau_mkdir
+#define	ja_diroff	ja_ref.if_diroff
+#define	ja_ino		ja_ref.if_ino
+#define	ja_parent	ja_ref.if_parent
+#define	ja_mode		ja_ref.if_mode
+
+/*
+ * A "jremref" structure tracks a removed reference (unlink) on an
+ * inode and prevents the directory remove from proceeding until the
+ * journal entry is written.  Once the journal has been written the remove
+ * may proceed as normal. 
+ */
+struct jremref {
+	struct	inoref	jr_ref;
+#	define	jr_list	jr_ref.if_list	/* Journal pending or jseg entries. */
+#	define	jr_state jr_ref.if_list.wk_state
+	LIST_ENTRY(jremref) jr_deps;	/* Links for pagdep. */
+	struct	dirrem	*jr_dirrem;	/* Back pointer to dirrem. */
+};
+
+struct jmvref {
+	struct	worklist jm_list;
+	LIST_ENTRY(jmvref) jm_deps;
+	struct pagedep	*jm_pagedep;
+	ino_t		jm_parent;
+	ino_t		jm_ino;
+	off_t		jm_oldoff;
+	off_t		jm_newoff;
+};
+
+/*
+ * A "jnewblk" structure tracks a newly allocated block or fragment and
+ * prevents the direct or indirect block pointer as well as the cg bitmap
+ * from being written until it is logged.  After it is logged the jsegdep
+ * is attached to the allocdirect or allocindir until the operation is
+ * completed or reverted.  If the operation is reverted prior to the journal
+ * write the jnewblk structure is maintained to prevent the bitmaps from
+ * reaching the disk.  Ultimately the jnewblk structure will be passed
+ * to the free routine as the in memory cg is modified back to the free
+ * state at which time it can be released.
+ */
+struct jnewblk {
+	struct	worklist jn_list;
+#	define	jn_state jn_list.wk_state
+	struct	jsegdep	*jn_jsegdep;
+	LIST_ENTRY(jnewblk) jn_deps;		/* All jnewblks on bmsafemap */
+	struct	newblk	*jn_newblk;
+	ino_t		jn_ino;
+	ufs_lbn_t	jn_lbn;
+	ufs2_daddr_t	jn_blkno;
+	int		jn_oldfrags;
+	int		jn_frags;
+};
+
+/*
+ * A "jfreeblk" structure tracks the journal write for freeing a block
+ * or tree of blocks.  The block pointer must not be cleared in the inode
+ * or indirect prior to the jfreeblk being written.
+ */
+struct jfreeblk {
+	struct	worklist jf_list;
+#	define	jf_state jf_list.wk_state
+	struct	jsegdep	*jf_jsegdep;
+	struct freeblks	*jf_freeblks;
+	LIST_ENTRY(jfreeblk) jf_deps;
+	ino_t		jf_ino;
+	ufs_lbn_t	jf_lbn;
+	ufs2_daddr_t	jf_blkno;
+	int		jf_frags;
+};
+
+/*
+ * A "jfreefrag" tracks the freeing of a single block when a fragment is
+ * extended or an indirect page is replaced.  It is not part of a larger
+ * freeblks operation.
+ */
+struct jfreefrag {
+	struct	worklist fr_list;
+#	define	fr_state fr_list.wk_state
+	struct	jsegdep	*fr_jsegdep;
+	struct freefrag	*fr_freefrag;
+	ino_t		fr_ino;
+	ufs_lbn_t	fr_lbn;
+	ufs2_daddr_t	fr_blkno;
+	int		fr_frags;
+};
+
+/*
+ * A "jsegdep" structure tracks a single reference to a written journal
+ * segment so the journal space can be reclaimed when all dependencies
+ * have been written.
+ */
+struct jsegdep {
+	struct	worklist jd_list;
+#	define	jd_state jd_list.wk_state
+	struct	jseg	*jd_seg;
+};
+
+/*
+ * A "jseg" structure contains all of the journal records written in a
+ * single disk write.  jaddref and jremref structures are linked into
+ * js_entries so thay may be completed when the write completes.  The
+ * js_deps array contains as many entries as there are ref counts to
+ * reduce the number of allocations required per journal write to one.
+ */
+struct jseg {
+	struct	worklist js_list;	/* b_deps link for journal */
+#	define	js_state js_list.wk_state
+	struct	workhead js_entries;	/* Entries awaiting write */
+	TAILQ_ENTRY(jseg) js_next;
+	struct	jblocks *js_jblocks;	/* Back pointer to block/seg list */
+	struct	buf *js_buf;		/* Buffer while unwritten */
+	uint64_t js_seq;
+	int	js_size;		/* Allocated size in bytes */
+	int	js_cnt;			/* Total items allocated */
+	int	js_refs;		/* Count of items pending completion */
+};
+
+/*
+ * A 'sbdep' structure tracks the head of the free inode list and
+ * superblock writes.  This makes sure the superblock is always pointing at
+ * the first possible unlinked inode for the suj recovery process.  If a
+ * block write completes and we discover a new head is available the buf
+ * is dirtied and the dep is kept.
+ */
+struct sbdep {
+	struct	worklist sb_list;	/* b_dep linkage */
+	struct	fs	*sb_fs;		/* Filesystem pointer within buf. */
+	struct	ufsmount *sb_ump;
+};
Index: /usr/src/sys/ufs/ffs/ffs_balloc.c
===================================================================
--- /usr/src/sys/ufs/ffs/ffs_balloc.c	(revision 202614)
+++ /usr/src/sys/ufs/ffs/ffs_balloc.c	(working copy)
@@ -120,6 +120,8 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffse
 	if (lbn < 0)
 		return (EFBIG);
 
+	if (DOINGSOFTDEP(vp))
+		softdep_prealloc(vp, MNT_WAIT);
 	/*
 	 * If the next write will extend the file into a new block,
 	 * and the file is currently composed of a fragment
@@ -418,6 +420,8 @@ fail:
 	 * slow, running out of disk space is not expected to be a common
 	 * occurence. The error return from fsync is ignored as we already
 	 * have an error to return to the user.
+	 *
+	 * XXX Still have to journal the free below
 	 */
 	(void) ffs_syncvnode(vp, MNT_WAIT);
 	for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
@@ -473,7 +477,7 @@ fail:
 	 */
 	for (blkp = allociblk; blkp < allocblk; blkp++) {
 		ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize,
-		    ip->i_number);
+		    ip->i_number, NULL);
 	}
 	return (error);
 }
@@ -515,6 +519,9 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffse
 	if (lbn < 0)
 		return (EFBIG);
 
+	if (DOINGSOFTDEP(vp))
+		softdep_prealloc(vp, MNT_WAIT);
+	
 	/*
 	 * Check for allocating external data.
 	 */
@@ -930,6 +937,8 @@ fail:
 	 * slow, running out of disk space is not expected to be a common
 	 * occurence. The error return from fsync is ignored as we already
 	 * have an error to return to the user.
+	 *
+	 * XXX Still have to journal the free below
 	 */
 	(void) ffs_syncvnode(vp, MNT_WAIT);
 	for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
@@ -985,7 +994,7 @@ fail:
 	 */
 	for (blkp = allociblk; blkp < allocblk; blkp++) {
 		ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize,
-		    ip->i_number);
+		    ip->i_number, NULL);
 	}
 	return (error);
 }
Index: /usr/src/sys/ufs/ffs/ffs_inode.c
===================================================================
--- /usr/src/sys/ufs/ffs/ffs_inode.c	(revision 202614)
+++ /usr/src/sys/ufs/ffs/ffs_inode.c	(working copy)
@@ -92,15 +92,6 @@ ffs_update(vp, waitfor)
 	fs = ip->i_fs;
 	if (fs->fs_ronly)
 		return (0);
-	/*
-	 * Ensure that uid and gid are correct. This is a temporary
-	 * fix until fsck has been changed to do the update.
-	 */
-	if (fs->fs_magic == FS_UFS1_MAGIC &&		/* XXX */
-	    fs->fs_old_inodefmt < FS_44INODEFMT) {	/* XXX */
-		ip->i_din1->di_ouid = ip->i_uid;	/* XXX */
-		ip->i_din1->di_ogid = ip->i_gid;	/* XXX */
-	}						/* XXX */
 	error = bread(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
 		(int)fs->fs_bsize, NOCRED, &bp);
 	if (error) {
@@ -232,7 +223,7 @@ ffs_truncate(vp, length, flags, cred, td)
 				if (oldblks[i] == 0)
 					continue;
 				ffs_blkfree(ump, fs, ip->i_devvp, oldblks[i],
-				    sblksize(fs, osize, i), ip->i_number);
+				    sblksize(fs, osize, i), ip->i_number, NULL);
 			}
 		}
 	}
@@ -336,6 +327,8 @@ ffs_truncate(vp, length, flags, cred, td)
 	 * zero'ed in case it ever becomes accessible again because
 	 * of subsequent file growth. Directories however are not
 	 * zero'ed as they should grow back initialized to empty.
+	 *
+	 * XXX Still need to manually journal this.
 	 */
 	offset = blkoff(fs, length);
 	if (offset == 0) {
@@ -445,7 +438,7 @@ ffs_truncate(vp, length, flags, cred, td)
 			if (lastiblock[level] < 0) {
 				DIP_SET(ip, i_ib[level], 0);
 				ffs_blkfree(ump, fs, ip->i_devvp, bn,
-				    fs->fs_bsize, ip->i_number);
+				    fs->fs_bsize, ip->i_number, NULL);
 				blocksreleased += nblocks;
 			}
 		}
@@ -464,7 +457,8 @@ ffs_truncate(vp, length, flags, cred, td)
 			continue;
 		DIP_SET(ip, i_db[i], 0);
 		bsize = blksize(fs, ip, i);
-		ffs_blkfree(ump, fs, ip->i_devvp, bn, bsize, ip->i_number);
+		ffs_blkfree(ump, fs, ip->i_devvp, bn, bsize, ip->i_number,
+		    NULL);
 		blocksreleased += btodb(bsize);
 	}
 	if (lastblock < 0)
@@ -496,7 +490,7 @@ ffs_truncate(vp, length, flags, cred, td)
 			 */
 			bn += numfrags(fs, newspace);
 			ffs_blkfree(ump, fs, ip->i_devvp, bn,
-			    oldspace - newspace, ip->i_number);
+			    oldspace - newspace, ip->i_number, NULL);
 			blocksreleased += btodb(oldspace - newspace);
 		}
 	}
@@ -638,7 +632,7 @@ ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp
 			blocksreleased += blkcount;
 		}
 		ffs_blkfree(ip->i_ump, fs, ip->i_devvp, nb, fs->fs_bsize,
-		    ip->i_number);
+		    ip->i_number, NULL);
 		blocksreleased += nblocks;
 	}
 
Index: /usr/src/sys/ufs/ffs/ffs_snapshot.c
===================================================================
--- /usr/src/sys/ufs/ffs/ffs_snapshot.c	(revision 202614)
+++ /usr/src/sys/ufs/ffs/ffs_snapshot.c	(working copy)
@@ -582,7 +582,8 @@ loop:
 			len = fragroundup(fs, blkoff(fs, xp->i_size));
 			if (len != 0 && len < fs->fs_bsize) {
 				ffs_blkfree(ump, copy_fs, vp,
-				    DIP(xp, i_db[loc]), len, xp->i_number);
+				    DIP(xp, i_db[loc]), len, xp->i_number,
+				    NULL);
 				blkno = DIP(xp, i_db[loc]);
 				DIP_SET(xp, i_db[loc], 0);
 			}
@@ -598,7 +599,7 @@ loop:
 			DIP_SET(xp, i_db[loc], blkno);
 		if (!error)
 			error = ffs_freefile(ump, copy_fs, vp, xp->i_number,
-			    xp->i_mode);
+			    xp->i_mode, NULL);
 		VOP_UNLOCK(xvp, 0);
 		vdrop(xvp);
 		if (error) {
@@ -700,7 +701,7 @@ out1:
 					     copy_fs,
 					     vp,
 					     xp->i_number,
-					     xp->i_mode);
+					     xp->i_mode, NULL);
 		}
 		if (error) {
 			fs->fs_snapinum[snaploc] = 0;
@@ -1220,7 +1221,7 @@ mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, ex
 			*ip->i_snapblklist++ = lblkno;
 		if (blkno == BLK_SNAP)
 			blkno = blkstofrags(fs, lblkno);
-		ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum);
+		ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum, NULL);
 	}
 	return (0);
 }
@@ -1500,7 +1501,7 @@ mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, ex
 			*ip->i_snapblklist++ = lblkno;
 		if (blkno == BLK_SNAP)
 			blkno = blkstofrags(fs, lblkno);
-		ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum);
+		ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum, NULL);
 	}
 	return (0);
 }
Index: /usr/src/sys/ufs/ffs/fs.h
===================================================================
--- /usr/src/sys/ufs/ffs/fs.h	(revision 202614)
+++ /usr/src/sys/ufs/ffs/fs.h	(working copy)
@@ -340,7 +340,10 @@ struct fs {
 	int32_t	 fs_avgfilesize;	/* expected average file size */
 	int32_t	 fs_avgfpdir;		/* expected # of files per directory */
 	int32_t	 fs_save_cgsize;	/* save real cg size to use fs_bsize */
-	int32_t	 fs_sparecon32[26];	/* reserved for future constants */
+	int32_t  fs_sujournal;		/* SUJ journal file */
+	int32_t  fs_sujfree;		/* SUJ free list */
+	ufs_time_t fs_mtime;		/* Last mount or fsck time. */
+	int32_t	 fs_sparecon32[22];	/* reserved for future constants */
 	int32_t  fs_flags;		/* see FS_ flags below */
 	int32_t	 fs_contigsumsize;	/* size of cluster summary array */ 
 	int32_t	 fs_maxsymlinklen;	/* max length of an internal symlink */
@@ -414,6 +417,7 @@ CTASSERT(sizeof(struct fs) == 1376);
 #define FS_GJOURNAL	0x0040	/* gjournaled file system */
 #define FS_FLAGS_UPDATED 0x0080	/* flags have been moved to new location */
 #define FS_NFS4ACLS	0x0100	/* file system has NFSv4 ACLs enabled */
+#define	FS_SUJ       0x200	/* Filesystem using softupdate journal */
 
 /*
  * Macros to access bits in the fs_active array.
@@ -603,8 +607,32 @@ struct cg {
 	  ? (fs)->fs_bsize \
 	  : (fragroundup(fs, blkoff(fs, (size)))))
 
-
 /*
+ * Indirect lbns are aligned on NDADDR addresses where single indirects
+ * are the negated address of the lowest lbn reachable, double indirects
+ * are this lbn - 1 and triple indirects are this lbn - 2.  This yields
+ * an unusual bit order to determine level.
+ */
+static inline int
+lbn_level(ufs_lbn_t lbn)
+{
+	if (lbn >= 0)
+		return 0;
+	switch (lbn & 0x3) {
+	case 0:
+		return (0);
+	case 1:
+		break;
+	case 2:
+		return (2);
+	case 3:
+		return (1);
+	default:
+		break;
+	}
+	return (-1);
+}
+/*
  * Number of inodes in a secondary storage block/fragment.
  */
 #define	INOPB(fs)	((fs)->fs_inopb)
@@ -615,6 +643,78 @@ struct cg {
  */
 #define	NINDIR(fs)	((fs)->fs_nindir)
 
+/*
+ * Softdep journal record format.
+ */
+
+#define	JOP_ADDREF	1	/* Add a reference to an inode. */
+#define	JOP_REMREF	2	/* Remove a reference from an inode. */
+#define	JOP_NEWBLK	3	/* Allocate a block. */
+#define	JOP_FREEBLK	4	/* Free a block or a tree of blocks. */
+#define	JOP_MVREF	5	/* Move a reference from one off to another. */
+
+#define	JREC_SIZE	32	/* Record and segment header size. */
+
+#define	SUJ_MIN		(1 * 1024 * 1024)	/* Minimum journal size */
+#define	SUJ_MAX		(64 * SUJ_MIN)		/* Maximum journal size */
+
+/*
+ * Size of the segment record header.  There is at most one for each disk
+ * block and at least one for each filesystem block in the journal.  The
+ * segment header is followed by an array of records.
+ */
+struct jsegrec {
+	uint64_t	jsr_seq;	/* Our sequence number */
+	uint64_t	jsr_oldest;	/* Oldest valid sequence number */
+	uint32_t	jsr_cnt;	/* Count of valid records */
+	uint32_t	jsr_crc;	/* 32bit crc of the valid space */
+	ufs_time_t	jsr_time;	/* timestamp for mount instance */
+};
+
+struct jrefrec {
+	uint32_t	jr_op;
+	ino_t		jr_ino;
+	ino_t		jr_parent;
+	uint16_t	jr_nlink;
+	uint16_t	jr_mode;
+	off_t		jr_diroff;
+	uint64_t	jr_unused;
+};
+
+struct jmvrec {
+	uint32_t	jm_op;
+	ino_t		jm_ino;
+	ino_t		jm_parent;
+	uint16_t	jm_unused;
+	off_t		jm_oldoff;
+	off_t		jm_newoff;
+};
+
+struct jblkrec {
+	uint32_t	jb_op;
+	uint32_t	jb_ino;
+	ufs2_daddr_t	jb_blkno;
+	ufs_lbn_t	jb_lbn;
+	uint16_t	jb_frags;
+	uint16_t	jb_oldfrags;
+	uint32_t	jb_unused;
+};
+
+union jrec {
+	struct jsegrec	rec_jsegrec;
+	struct jrefrec	rec_jrefrec;
+	struct jmvrec	rec_jmvrec;
+	struct jblkrec	rec_jblkrec;
+};
+
+#ifdef CTASSERT
+CTASSERT(sizeof(struct jsegrec) == JREC_SIZE);
+CTASSERT(sizeof(struct jrefrec) == JREC_SIZE);
+CTASSERT(sizeof(struct jmvrec) == JREC_SIZE);
+CTASSERT(sizeof(struct jblkrec) == JREC_SIZE);
+CTASSERT(sizeof(union jrec) == JREC_SIZE);
+#endif
+
 extern int inside[], around[];
 extern u_char *fragtbl[];
 
Index: /usr/src/sys/kern/vfs_bio.c
===================================================================
--- /usr/src/sys/kern/vfs_bio.c	(revision 202614)
+++ /usr/src/sys/kern/vfs_bio.c	(working copy)
@@ -216,6 +216,14 @@ SYSCTL_LONG(_vfs, OID_AUTO, notbufdflashes, CTLFLA
 static int bd_request;
 
 /*
+ * Request for the buf daemon to write more buffers than is indicated by
+ * lodirtybuf.  This may be necessary to push out excess dependencies or
+ * defragment the address space where a simple count of the number of dirty
+ * buffers is insufficient to characterize the demand for flushing them.
+ */
+static int bd_speedupreq;
+
+/*
  * This lock synchronizes access to bd_request.
  */
 static struct mtx bdlock;
@@ -467,12 +475,20 @@ bd_wakeup(int dirtybuflevel)
  * bd_speedup - speedup the buffer cache flushing code
  */
 
-static __inline
 void
 bd_speedup(void)
 {
+	int needwake;
 
-	bd_wakeup(1);
+	mtx_lock(&bdlock);
+	needwake = 0;
+	if (bd_speedupreq == 0 || bd_request == 0)
+		needwake = 1;
+	bd_speedupreq = 1;
+	bd_request = 1;
+	if (needwake)
+		wakeup(&bd_request);
+	mtx_unlock(&bdlock);
 }
 
 /*
@@ -2120,6 +2136,7 @@ buf_do_flush(struct vnode *vp)
 static void
 buf_daemon()
 {
+	int lodirtysave;
 
 	/*
 	 * This process needs to be suspended prior to shutdown sync.
@@ -2137,7 +2154,11 @@ buf_daemon()
 		mtx_unlock(&bdlock);
 
 		kproc_suspend_check(bufdaemonproc);
-
+		lodirtysave = lodirtybuffers;
+		if (bd_speedupreq) {
+			lodirtybuffers = numdirtybuffers / 2;
+			bd_speedupreq = 0;
+		}
 		/*
 		 * Do the flush.  Limit the amount of in-transit I/O we
 		 * allow to build up, otherwise we would completely saturate
@@ -2149,6 +2170,7 @@ buf_daemon()
 				break;
 			uio_yield();
 		}
+		lodirtybuffers = lodirtysave;
 
 		/*
 		 * Only clear bd_request if we have reached our low water
Index: /usr/src/sys/kern/vfs_subr.c
===================================================================
--- /usr/src/sys/kern/vfs_subr.c	(revision 202614)
+++ /usr/src/sys/kern/vfs_subr.c	(working copy)
@@ -2816,6 +2816,7 @@ DB_SHOW_COMMAND(mount, db_show_mount)
 	MNT_FLAG(MNT_FORCE);
 	MNT_FLAG(MNT_SNAPSHOT);
 	MNT_FLAG(MNT_BYFSID);
+	MNT_FLAG(MNT_SOFTDEP);
 #undef MNT_FLAG
 	if (flags != 0) {
 		if (buf[0] != '\0')
Index: /usr/src/sys/sys/mount.h
===================================================================
--- /usr/src/sys/sys/mount.h	(revision 202614)
+++ /usr/src/sys/sys/mount.h	(working copy)
@@ -240,6 +240,7 @@ void          __mnt_vnode_markerfree(struct vnode
 #define	MNT_NOCLUSTERR	0x40000000	/* disable cluster read */
 #define	MNT_NOCLUSTERW	0x80000000	/* disable cluster write */
 #define	MNT_NFS4ACLS	0x00000010
+#define	MNT_SUJ		0x00000020	/* softdep journaling */
 
 /*
  * NFS export related mount flags.
@@ -275,7 +276,8 @@ void          __mnt_vnode_markerfree(struct vnode
 			MNT_ROOTFS	| MNT_NOATIME	| MNT_NOCLUSTERR| \
 			MNT_NOCLUSTERW	| MNT_SUIDDIR	| MNT_SOFTDEP	| \
 			MNT_IGNORE	| MNT_EXPUBLIC	| MNT_NOSYMFOLLOW | \
-			MNT_GJOURNAL	| MNT_MULTILABEL | MNT_ACLS | MNT_NFS4ACLS)
+			MNT_GJOURNAL	| MNT_MULTILABEL | MNT_ACLS	| \
+			MNT_NFS4ACLS	| MNT_SUJ)
 
 /* Mask of flags that can be updated. */
 #define	MNT_UPDATEMASK (MNT_NOSUID	| MNT_NOEXEC	| \
Index: /usr/src/sys/sys/buf.h
===================================================================
--- /usr/src/sys/sys/buf.h	(revision 202614)
+++ /usr/src/sys/sys/buf.h	(working copy)
@@ -493,6 +493,7 @@ int	bufwait(struct buf *);
 int	bufwrite(struct buf *);
 void	bufdone(struct buf *);
 void	bufdone_finish(struct buf *);
+void	bd_speedup(void);
 
 int	cluster_read(struct vnode *, u_quad_t, daddr_t, long,
 	    struct ucred *, long, int, struct buf **);