GENERIC HEAD from 2009-12-18 22:14:28 UTC, r200709M, vmcore.16 KDB: debugger backends: ddb KDB: current backend: ddb Copyright (c) 1992-2009 The FreeBSD Project. Copyright (c) 1979, 1980, 1983, 1986, 1988, 1989, 1991, 1992, 1993, 1994 The Regents of the University of California. All rights reserved. FreeBSD is a registered trademark of The FreeBSD Foundation. FreeBSD 9.0-CURRENT #0 r200709M: Sat Dec 19 09:59:51 CET 2009 pho@crashbox.osted.lan:/usr/src/sys/i386/compile/PHO i386 WARNING: WITNESS option enabled, expect reduced performance. WARNING: DIAGNOSTIC option enabled, expect reduced performance. Timecounter "i8254" frequency 1193182 Hz quality 0 CPU: Intel(R) XEON(TM) CPU 1.80GHz (1799.81-MHz 686-class CPU) Origin = "GenuineIntel" Id = 0xf24 Stepping = 4 Features=0x3febfbff real memory = 1073741824 (1024 MB) avail memory = 1031368704 (983 MB) : Trying to mount root from ufs:/dev/ad0s1a WARNING: / was not properly dismounted Entropy harvesting: interrupts ethernet point_to_point kickstart. /dev/ad0s1a: 4393 files, 237202 used, 269285 free (645 frags, 33580 blocks, 0.1% fragmentation) /dev/ad0s1f: 1936 files, 56706 used, 449781 free (501 frags, 56160 blocks, 0.1% fragmentation) SU+J Checking /dev/ad0s1e Reading 67108864 byte journal from inode 4. fsck_ufs: Invalid journal record size 323584, avail 16384 /dev/ad0s1d: 435082 files, 2232453 used, 2844626 free (51394 frags, 349154 blocks, 1.0% fragmentation) /dev/ad0s1g: 394763 files, 4516088 used, 21375257 free (23393 frags, 2668983 blocks, 0.1% fragmentation) THE FOLLOWING FILE SYSTEM HAD AN UNEXPECTED INCONSISTENCY: ufs: /dev/ad0s1e (/tmp) Unknown error; help! ERROR: ABORTING BOOT (sending SIGTERM to parent)! Dec 19 10:28:56 init: /bin/sh on /etc/rc terminated abnormally, going to single user mode Enter full pathname of shell or RETURN for /bin/sh: # fsck -y ** /dev/ad0s1a ** Last Mounted on / ** Root file system ** Phase 1 - Check Blocks and Sizes ** Phase 2 - Check Pathnames ** Phase 3 - Check Connectivity ** Phase 4 - Check Reference Counts ** Phase 5 - Check Cyl groups 4393 files, 237202 used, 269285 free (645 frags, 33580 blocks, 0.1% fragmentation) ***** FILE SYSTEM IS CLEAN ***** ** /dev/ad0s1f ** Last Mounted on /home ** Phase 1 - Check Blocks and Sizes ** Phase 2 - Check Pathnames ** Phase 3 - Check Connectivity ** Phase 4 - Check Reference Counts ** Phase 5 - Check Cyl groups 1936 files, 56706 used, 449781 free (501 frags, 56160 blocks, 0.1% fragmentation) ***** FILE SYSTEM IS CLEAN ***** ** /dev/ad0s1e ** Last Mounted on /tmp ** Phase 1 - Check Blocks and Sizes ** Phase 2 - Check Pathnames ** Phase 3 - Check Connectivity ** Phase 4 - Check Reference Counts ** Phase 5 - Check Cyl groups FREE BLK COUNT(S) WRONG IN SUPERBLK SALVAGE? yes SUMMARY INFORMATION BAD SALVAGE? yes BLK(S) MISSING IN BIT MAPS SALVAGE? yes 2059 files, 32817 used, 1996214 free (62 frags, 249519 blocks, 0.0% fragmentation) ***** FILE SYSTEM MARKED CLEAN ***** ***** FILE SYSTEM WAS MODIFIED ***** ** /dev/ad0s1d ** Last Mounted on /usr ** Phase 1 - Check Blocks and Sizes ** Phase 2 - Check Pathnames ** Phase 3 - Check Connectivity ** Phase 4 - Check Reference Counts ** Phase 5 - Check Cyl groups 435082 files, 2232453 used, 2844626 free (51394 frags, 349154 blocks, 1.0% fragmentation) ***** FILE SYSTEM IS CLEAN ***** ** /dev/ad0s1g ** Last Mounted on /var ** Phase 1 - Check Blocks and Sizes ** Phase 2 - Check Pathnames ** Phase 3 - Check Connectivity ** Phase 4 - Check Reference Counts ** Phase 5 - Check Cyl groups 394763 files, 4516088 used, 21375257 free (23393 frags, 2668983 blocks, 0.1% fragmentation) ***** FILE SYSTEM IS CLEAN ***** # exit Entropy harvesting: interrupts ethernet point_to_point kickstart. Fast boot: skipping disk checks. lock order reversal: 1st 0xd8502620 bufwait (bufwait) @ kern/vfs_bio.c:2559 2nd 0xc483a800 dirhash (dirhash) @ ufs/ufs/ufs_dirhash.c:283 KDB: stack backtrace: db_trace_self_wrapper(c0ca2f2a,e6d16880,c08d1d85,c08c286b,c0ca5ece,...) at db_trace_self_wrapper+0x26 kdb_backtrace(c08c286b,c0ca5ece,c4538030,c453b290,e6d168dc,...) at kdb_backtrace+0x29 _witness_debugger(c0ca5ece,c483a800,c0cc9077,c453b290,c0cc8d1d,...) at _witness_debugger+0x25 witness_checkorder(c483a800,9,c0cc8d14,11b,0,...) at witness_checkorder+0x839 _sx_xlock(c483a800,0,c0cc8d14,11b,c4b6a7b4,...) at _sx_xlock+0x85 ufsdirhash_acquire(d85025c0,e6d16a1c,164,d8b654ac,e6d169a8,...) at ufsdirhash_acquire+0x48 ufsdirhash_add(c4b6a7b4,e6d16a1c,4ac,e6d16994,e6d16998,...) at ufsdirhash_add+0x13 ufs_direnter(c4c3e414,c4b786cc,e6d16a1c,e6d16c00,d8505000,...) at ufs_direnter+0x669 ufs_mkdir(e6d16c28,c0cde6d7,0,0,e6d16b6c,...) at ufs_mkdir+0x981 VOP_MKDIR_APV(c0daeb60,e6d16c28,e6d16c00,e6d16b6c,0,...) at VOP_MKDIR_APV+0xc5 kern_mkdirat(c481bd80,ffffff9c,bfbfef5a,0,1ff,...) at kern_mkdirat+0x21b kern_mkdir(c481bd80,bfbfef5a,0,1ff,e6d16d2c,...) at kern_mkdir+0x2e mkdir(c481bd80,e6d16cf8,8,c0ca6794,c0d8ce00,...) at mkdir+0x29 syscall(e6d16d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (136, FreeBSD ELF32, mkdir), eip = 0x2816a5c3, esp = 0xbfbfed6c, ebp = 0xbfbfee38 --- fxp0: link state changed to UP Starting Network: lo0 fxp0. add net default: gateway 192.168.1.1 Additional ABI support: linux. Starting mountd. Configuring syscons: keymap blanktime. Local package initialization:lock order reversal: 1st 0xc4c3e9dc ufs (ufs) @ kern/vfs_subr.c:2083 2nd 0xd850f620 bufwait (bufwait) @ ufs/ffs/ffs_softdep.c:9536 3rd 0xc4d1a1b4 ufs (ufs) @ kern/vfs_subr.c:2083 KDB: stack backtrace: db_trace_self_wrapper(c0ca2f2a,e6de585c,c08d1d85,c08c286b,c0ca5ee7,...) at db_trace_self_wrapper+0x26 kdb_backtrace(c08c286b,c0ca5ee7,c4538030,c453b228,e6de58b8,...) at kdb_backtrace+0x29 _witness_debugger(c0ca5ee7,c4d1a1b4,c0c9869b,c453b228,c0cad0b7,...) at _witness_debugger+0x25 witness_checkorder(c4d1a1b4,9,c0cad0ae,823,0,...) at witness_checkorder+0x839 __lockmgr_args(c4d1a1b4,80100,c4d1a220,0,0,...) at __lockmgr_args+0x824 ffs_lock(e6de59d8,c08d1b2b,c0cac595,80100,c4d1a15c,...) at ffs_lock+0xa1 VOP_LOCK1_APV(c0daeb60,e6de59d8,c4c019a4,c0dc9520,c4d1a15c,...) at VOP_LOCK1_APV+0xb5 _vn_lock(c4d1a15c,80100,c0cad0ae,823,4,...) at _vn_lock+0x78 vget(c4d1a15c,80100,c4c01900,50,0,...) at vget+0xbb vfs_hash_get(c4c3d2d4,50800,80000,c4c01900,e6de5b38,...) at vfs_hash_get+0xed ffs_vgetf(c4c3d2d4,50800,80000,e6de5b38,1,...) at ffs_vgetf+0x49 softdep_sync_metadata(c4c3e984,0,c0cc8960,146,0,...) at softdep_sync_metadata+0x583 ffs_syncvnode(c4c3e984,1,c4c01900,51d,c4c3e828,...) at ffs_syncvnode+0x3e2 ffs_sync(c4c3d2d4,1,c0cac8ac,4f9,80,...) at ffs_sync+0x26f dounmount(c4c3d2d4,8000000,c4c01900,47e,65485f12,...) at dounmount+0x44e unmount(c4c01900,e6de5cf8,8,c4c01900,c0d8c188,...) at unmount+0x2ff syscall(e6de5d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (22, FreeBSD ELF32, unmount), eip = 0x280da13f, esp = 0xbfbfe68c, ebp = 0xbfbfe758 --- lock order reversal: 1st 0xc4b68c94 ufs (ufs) @ kern/vfs_mount.c:1204 2nd 0xc4b76724 devfs (devfs) @ ufs/ffs/ffs_softdep.c:1271 KDB: stack backtrace: db_trace_self_wrapper(c0ca2f2a,e6de59dc,c08d1d85,c08c286b,c0ca5ece,...) at db_trace_self_wrapper+0x26 kdb_backtrace(c08c286b,c0ca5ece,c453b228,c453b0f0,e6de5a38,...) at kdb_backtrace+0x29 _witness_debugger(c0ca5ece,c4b76724,c0c9459d,c453b0f0,c0cc60f0,...) at _witness_debugger+0x25 witness_checkorder(c4b76724,9,c0cc60e7,4f7,c4b76790,...) at witness_checkorder+0x839 __lockmgr_args(c4b76724,80400,c4b76790,0,0,...) at __lockmgr_args+0x824 vop_stdlock(e6de5b54,c0c9e273,df,80400,c4b766cc,...) at vop_stdlock+0x65 VOP_LOCK1_APV(c0d883c0,e6de5b54,0,c0dc9520,c4b766cc,...) at VOP_LOCK1_APV+0xb5 _vn_lock(c4b766cc,80400,c0cc60e7,4f7,c4c3d2d4,...) at _vn_lock+0x78 softdep_flushworklist(c4c3d2d4,e6de5c00,c4c01900,52b,c4c3e828,...) at softdep_flushworklist+0x47 ffs_sync(c4c3d2d4,1,c0cac8ac,4f9,80,...) at ffs_sync+0x2fd dounmount(c4c3d2d4,8000000,c4c01900,47e,65485f12,...) at dounmount+0x44e unmount(c4c01900,e6de5cf8,8,c4c01900,c0d8c188,...) at unmount+0x2ff syscall(e6de5d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (22, FreeBSD ELF32, unmount), eip = 0x280da13f, esp = 0xbfbfe68c, ebp = 0xbfbfe758 --- ** /dev/ad0s1e ** Last Mounted on /tmp ** Phase 1 - Check Blocks and Sizes ** Phase 2 - Check Pathnames ** Phase 3 - Check Connectivity ** Phase 4 - Check Reference Counts ** Phase 5 - Check Cyl groups 2059 files, 32817 used, 1996214 free (62 frags, 249519 blocks, 0.0% fragmentation) ***** FILE SYSTEM IS CLEAN ***** usage: kill [-s signal_name] pid ... kill -l [exit_status] kill -signal_name pid ... kill -signal_number pid ... fsck -y /tmp watchdogd. Sat Dec 19 10:32:22 CET 2009 Dec 19 10:32:31 crashbox su: pho to root on /dev/pts/0 Expensive timeout(9) function: 0xc08a01b0(0xc4d12aa0) 2.765448930 s panic: Bad link elm 0xc6964280 next->prev != elm cpuid = 3 KDB: enter: panic [thread pid 3 tid 100015 ] Stopped at kdb_enter+0x3a: movl $0,kdb_why db> run pho db:0:pho> bt Tracing pid 3 tid 100015 td 0xc457e6c0 kdb_enter(c0c9faeb,c0c9faeb,c0c39b08,c4302b54,3,...) at kdb_enter+0x3a panic(c0c39b08,c6964280,0,c70afe80,0,...) at panic+0x136 handle_allocindir_partdone(c0f769ac,0,c0cc60e7,1c3d,c0dfe7b4,...) at handle_allocindir_partdone+0x45 softdep_disk_write_complete(d85dbe80,0,c0cc84f6,6d5,c4b767c4,...) at softdep_disk_write_complete+0xd4b ffs_backgroundwritedone(d85dbe80,c1879000,d85dbe80,c4aa3980,c4302c98,...) at ffs_backgroundwritedone+0xa3 bufdone(d85dbe80,c454e95c) at bufdone+0x53 g_vfs_done(c4aa3980,0,c0caaca8,c16,c4aa3980,...) at g_vfs_done+0x85 biodone(c4aa3980,c0dfc268,24c,c0c9544e,0,...) at biodone+0xa5 g_io_schedule_up(c457e6c0,0,c0c96b16,5d,0,...) at g_io_schedule_up+0xc7 g_up_procbody(0,c4302d38,c0c9afc8,343,c457a2a8,...) at g_up_procbody+0x8d fork_exit(c082bd50,0,c4302d38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc4302d70, ebp = 0 --- db:0:bt> show allpcpu Current CPU: 3 cpuid = 0 dynamic pcpu = 0x65b400 curthread = 0xc457c480: pid 11 "idle: cpu0" curpcb = 0xc42e4d90 fpcurthread = none idlethread = 0xc457c480: pid 11 "idle: cpu0" APIC ID = 0 currentldt = 0x50 spin locks held: cpuid = 1 dynamic pcpu = 0x34f1400 curthread = 0xc4b46900: pid 1181 "top" curpcb = 0xe6d08d90 fpcurthread = none idlethread = 0xc457c6c0: pid 11 "idle: cpu1" APIC ID = 1 currentldt = 0x50 spin locks held: cpuid = 2 dynamic pcpu = 0x34f4400 curthread = 0xc529a6c0: pid 9011 "rw" curpcb = 0xe7101d90 fpcurthread = none idlethread = 0xc457c900: pid 11 "idle: cpu2" APIC ID = 6 currentldt = 0x50 spin locks held: cpuid = 3 dynamic pcpu = 0x34f7400 curthread = 0xc457e6c0: pid 3 "g_up" curpcb = 0xc4302d90 fpcurthread = none idlethread = 0xc457cb40: pid 11 "idle: cpu3" APIC ID = 7 currentldt = 0x50 spin locks held: db:0:allpcpu> show alllocks Process 9021 (rw) thread 0xc68e7480 (100201) exclusive lockmgr bufwait (bufwait) r = 0 (0xd861ea80) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd87290e0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd87642a0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd86a75c0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd877e5e0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd86e22a0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8765620) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8707220) locked @ kern/vfs_bio.c:2559 exclusive lockmgr ufs (ufs) r = 0 (0xc53f7b38) locked @ kern/vfs_vnops.c:607 Process 9020 (rw) thread 0xc49a8480 (100232) exclusive lockmgr bufwait (bufwait) r = 0 (0xd872bb20) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8733500) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd87a8840) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd86f8820) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8738300) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd87934a0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd877bd40) locked @ kern/vfs_bio.c:2559 exclusive lockmgr ufs (ufs) r = 0 (0xc4ab0c94) locked @ kern/vfs_vnops.c:607 Process 9017 (rw) thread 0xc4b46240 (100072) exclusive lockmgr bufwait (bufwait) r = 0 (0xd87722e0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8745cc0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd84f6660) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd85222c0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd871dae0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd84f3400) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd87b3e40) locked @ kern/vfs_bio.c:2559 exclusive lockmgr ufs (ufs) r = 0 (0xc55ca1b4) locked @ kern/vfs_vnops.c:607 Process 9016 (rw) thread 0xc5192480 (100160) exclusive lockmgr bufwait (bufwait) r = 0 (0xd8741d60) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd87a9a20) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8705000) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8728da0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd85ed180) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd87b10c0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd851e6a0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr ufs (ufs) r = 0 (0xc62b91b4) locked @ kern/vfs_vnops.c:607 Process 9015 (rw) thread 0xc529a240 (100319) exclusive lockmgr bufwait (bufwait) r = 0 (0xd8581a40) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd85348e0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd854bb60) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd871a880) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8755560) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8782f00) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd87b5360) locked @ kern/vfs_bio.c:2559 exclusive lockmgr ufs (ufs) r = 0 (0xc5feb5c8) locked @ kern/vfs_vnops.c:607 Process 9014 (rw) thread 0xc5aa4240 (100259) exclusive lockmgr bufwait (bufwait) r = 0 (0xd87a62e0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8757ac0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd87ac120) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd876e860) locked @ kern/vfs_bio.c:2559 exclusive lockmgr ufs (ufs) r = 0 (0xc4cafdf0) locked @ kern/vfs_vnops.c:607 Process 9012 (rw) thread 0xc529a480 (100318) exclusive lockmgr bufwait (bufwait) r = 0 (0xd86945e0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd86d0160) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd84fc980) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8741200) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8523cc0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8756a80) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd87b03c0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd878aa80) locked @ kern/vfs_bio.c:2559 exclusive lockmgr ufs (ufs) r = 0 (0xc54e85c8) locked @ kern/vfs_vnops.c:607 Process 9011 (rw) thread 0xc529a6c0 (100317) exclusive lockmgr bufwait (bufwait) r = 0 (0xd8697840) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd85157a0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd87b7720) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8775880) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd879b6a0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8791de0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr ufs (ufs) r = 0 (0xc5fe7058) locked @ kern/vfs_vnops.c:607 Process 9008 (rw) thread 0xc4a7e6c0 (100329) exclusive lockmgr ufs (ufs) r = 0 (0xc5fe75c8) locked @ kern/vfs_vnops.c:607 Process 9006 (rw) thread 0xc5191900 (100166) exclusive lockmgr bufwait (bufwait) r = 0 (0xd8501100) locked @ kern/vfs_bio.c:2559 exclusive lockmgr ufs (ufs) r = 0 (0xc54e4724) locked @ kern/vfs_vnops.c:607 Process 9005 (rw) thread 0xc4afa900 (100055) exclusive lockmgr bufwait (bufwait) r = 0 (0xd8710ae0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8505d60) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd876f560) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8743c40) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd873ee40) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8777c40) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd855a220) locked @ kern/vfs_bio.c:2559 exclusive lockmgr ufs (ufs) r = 0 (0xc4caf724) locked @ kern/vfs_vnops.c:607 Process 9003 (rw) thread 0xc4c3b6c0 (100099) exclusive lockmgr bufwait (bufwait) r = 0 (0xd8781360) locked @ kern/vfs_bio.c:2559 exclusive lockmgr ufs (ufs) r = 0 (0xc55cab38) locked @ kern/vfs_vnops.c:607 Process 9002 (rw) thread 0xc4bfe000 (100085) exclusive lockmgr bufwait (bufwait) r = 0 (0xd876a760) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8734f00) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd871b240) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd87775c0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd861e260) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8770dc0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8731e40) locked @ kern/vfs_bio.c:2559 exclusive lockmgr ufs (ufs) r = 0 (0xc53cadf0) locked @ kern/vfs_vnops.c:607 Process 9000 (rw) thread 0xc4be7000 (100210) exclusive lockmgr bufwait (bufwait) r = 0 (0xd87b70a0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd85244e0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8792940) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8548280) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd87628a0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8727880) locked @ kern/vfs_bio.c:2559 exclusive lockmgr ufs (ufs) r = 0 (0xc573c1b4) locked @ kern/vfs_vnops.c:607 Process 8999 (rw) thread 0xc4b46b40 (100061) exclusive lockmgr bufwait (bufwait) r = 0 (0xd87671c0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8777900) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd872dba0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd87a8d20) locked @ kern/vfs_bio.c:2559 exclusive lockmgr ufs (ufs) r = 0 (0xc506946c) locked @ kern/vfs_vnops.c:607 Process 8997 (rw) thread 0xc4b43000 (100198) exclusive lockmgr bufwait (bufwait) r = 0 (0xd871e980) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8528e00) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8736de0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd878bfa0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd85130a0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8798920) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd872c340) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd87317c0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr ufs (ufs) r = 0 (0xc4caf5c8) locked @ kern/vfs_vnops.c:607 Process 8995 (rw) thread 0xc4d1d240 (100113) exclusive lockmgr bufwait (bufwait) r = 0 (0xd8793160) locked @ kern/vfs_bio.c:2559 exclusive lockmgr ufs (ufs) r = 0 (0xc5dd7724) locked @ kern/vfs_vnops.c:607 Process 8993 (rw) thread 0xc529d240 (100237) exclusive lockmgr bufwait (bufwait) r = 0 (0xd8633fc0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8591960) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8508c80) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd875a9e0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd86dcc80) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8508600) locked @ kern/vfs_bio.c:2559 exclusive lockmgr ufs (ufs) r = 0 (0xc4caf880) locked @ kern/vfs_vnops.c:607 Process 8992 (rw) thread 0xc50cfb40 (100309) exclusive lockmgr ufs (ufs) r = 0 (0xc515e724) locked @ kern/vfs_vnops.c:607 Process 8989 (rw) thread 0xc4bfe240 (100084) exclusive lockmgr ufs (ufs) r = 0 (0xc5dd7058) locked @ kern/vfs_vnops.c:607 Process 8988 (rw) thread 0xc4a9a480 (100323) exclusive lockmgr bufwait (bufwait) r = 0 (0xd86aab60) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8737600) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd878f880) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd877b040) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8507c40) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8760680) locked @ kern/vfs_bio.c:2559 exclusive lockmgr ufs (ufs) r = 0 (0xc5e071b4) locked @ kern/vfs_vnops.c:607 Process 8985 (rw) thread 0xc4be7240 (100209) exclusive lockmgr ufs (ufs) r = 0 (0xc54ba1b4) locked @ kern/vfs_vnops.c:607 Process 8984 (rw) thread 0xc529db40 (100262) exclusive lockmgr bufwait (bufwait) r = 0 (0xd8520f40) locked @ kern/vfs_bio.c:2559 exclusive lockmgr ufs (ufs) r = 0 (0xc5dd71b4) locked @ kern/vfs_vnops.c:607 Process 8983 (rw) thread 0xc50ccd80 (100290) exclusive lockmgr bufwait (bufwait) r = 0 (0xd86f7640) locked @ kern/vfs_bio.c:2559 exclusive lockmgr ufs (ufs) r = 0 (0xc4caf058) locked @ kern/vfs_vnops.c:607 Process 8982 (rw) thread 0xc4a7e240 (100331) exclusive lockmgr bufwait (bufwait) r = 0 (0xd86361e0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8775bc0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd878a8e0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8757c60) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd877ee00) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd87a0e60) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd86f4f40) locked @ kern/vfs_bio.c:2559 exclusive lockmgr ufs (ufs) r = 0 (0xc54ba058) locked @ kern/vfs_vnops.c:607 Process 8981 (rw) thread 0xc481bd80 (100067) exclusive lockmgr bufwait (bufwait) r = 0 (0xd8744fc0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8747ee0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8777280) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8711e60) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8503e80) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd873ba40) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd857ba60) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd877cf20) locked @ kern/vfs_bio.c:2559 exclusive lockmgr ufs (ufs) r = 0 (0xc607a880) locked @ kern/vfs_vnops.c:607 Process 8980 (rw) thread 0xc4c3cb40 (100138) exclusive lockmgr ufs (ufs) r = 0 (0xc5dd7880) locked @ kern/vfs_vnops.c:607 Process 8979 (rw) thread 0xc5aa5900 (100281) exclusive lockmgr bufwait (bufwait) r = 0 (0xd86ccf00) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd87aeea0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8792e20) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd876dea0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8754860) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8506da0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr ufs (ufs) r = 0 (0xc6611880) locked @ kern/vfs_vnops.c:607 Process 8976 (rw) thread 0xc47d5b40 (100273) exclusive lockmgr bufwait (bufwait) r = 0 (0xd87af520) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd878a5a0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8505060) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd87937e0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd8766180) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd86f6c80) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd87609c0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr bufwait (bufwait) r = 0 (0xd85241a0) locked @ kern/vfs_bio.c:2559 exclusive lockmgr ufs (ufs) r = 0 (0xc62b946c) locked @ kern/vfs_vnops.c:607 Process 1181 (top) thread 0xc4b46900 (100065) exclusive sleep mutex vm object (standard object) r = 0 (0xc50d9bb0) locked @ vm/vm_object.c:482 shared sx user map (user map) r = 0 (0xc4580ec8) locked @ vm/vm_map.c:3532 shared sx sysctl lock (sysctl lock) r = 0 (0xc0dfe564) locked @ kern/kern_sysctl.c:1521 exclusive sx sysctl mem (sysctl mem) r = 0 (0xc0dfe578) locked @ kern/kern_sysctl.c:1513 Process 1176 (sshd) thread 0xc4afa6c0 (100056) exclusive sx so_rcv_sx (so_rcv_sx) r = 0 (0xc4c62a34) locked @ kern/uipc_sockbuf.c:148 Process 1175 (sshd) thread 0xc4b47000 (100057) exclusive sx so_rcv_sx (so_rcv_sx) r = 0 (0xc4ddc3c4) locked @ kern/uipc_sockbuf.c:148 Process 1174 (sshd) thread 0xc4b46d80 (100058) exclusive sx so_rcv_sx (so_rcv_sx) r = 0 (0xc4bf06fc) locked @ kern/uipc_sockbuf.c:148 Process 1108 (sshd) thread 0xc481b6c0 (100088) exclusive sx so_rcv_sx (so_rcv_sx) r = 0 (0xc4bf0560) locked @ kern/uipc_sockbuf.c:148 Process 3 (g_up) thread 0xc457e6c0 (100015) exclusive sleep mutex Softdep Lock (Softdep Lock) r = 0 (0xc0f769ac) locked @ ufs/ffs/ffs_softdep.c:7229 db:0:alllocks> show lockedvnods Locked vnodes 0xc6611828: tag ufs, type VREG usecount 1, writecount 1, refcount 144 mountedhere 0 flags () v_object 0xc5df5e58 ref 0 pages 4588 lock type ufs: EXCL by thread 0xc5aa5900 (pid 8979) #0 0xc087787e at __lockmgr_args+0xc1e #1 0xc0ad4b01 at ffs_lock+0xa1 #2 0xc0be51b5 at VOP_LOCK1_APV+0xb5 #3 0xc092e478 at _vn_lock+0x78 #4 0xc092f7e6 at vn_write+0x156 #5 0xc08d4475 at dofilewrite+0x95 #6 0xc08d5bc8 at kern_writev+0x58 #7 0xc08d5cdf at write+0x4f #8 0xc0bcc364 at syscall+0x2b4 #9 0xc0bae6a0 at Xint0x80_syscall+0x20 ino 447492, on dev ad0s1e 0xc607a828: tag ufs, type VREG usecount 1, writecount 1, refcount 197 mountedhere 0 flags () v_object 0xc7401220 ref 0 pages 4912 lock type ufs: EXCL by thread 0xc481bd80 (pid 8981) #0 0xc087787e at __lockmgr_args+0xc1e #1 0xc0ad4b01 at ffs_lock+0xa1 #2 0xc0be51b5 at VOP_LOCK1_APV+0xb5 #3 0xc092e478 at _vn_lock+0x78 #4 0xc092f7e6 at vn_write+0x156 #5 0xc08d4475 at dofilewrite+0x95 #6 0xc08d5bc8 at kern_writev+0x58 #7 0xc08d5cdf at write+0x4f #8 0xc0bcc364 at syscall+0x2b4 #9 0xc0bae6a0 at Xint0x80_syscall+0x20 ino 471044, on dev ad0s1e 0xc5dd7828: tag ufs, type VREG usecount 1, writecount 1, refcount 234 mountedhere 0 flags () v_object 0xc618b110 ref 0 pages 5024 lock type ufs: EXCL by thread 0xc4c3cb40 (pid 8980) #0 0xc087787e at __lockmgr_args+0xc1e #1 0xc0ad4b01 at ffs_lock+0xa1 #2 0xc0be51b5 at VOP_LOCK1_APV+0xb5 #3 0xc092e478 at _vn_lock+0x78 #4 0xc092f7e6 at vn_write+0x156 #5 0xc08d4475 at dofilewrite+0x95 #6 0xc08d5bc8 at kern_writev+0x58 #7 0xc08d5cdf at write+0x4f #8 0xc0bcc364 at syscall+0x2b4 #9 0xc0bae6a0 at Xint0x80_syscall+0x20 ino 447493, on dev ad0s1e 0xc54ba000: tag ufs, type VREG usecount 1, writecount 1, refcount 166 mountedhere 0 flags () v_object 0xc56ac4c8 ref 0 pages 4792 lock type ufs: EXCL by thread 0xc4a7e240 (pid 8982) #0 0xc087787e at __lockmgr_args+0xc1e #1 0xc0ad4b01 at ffs_lock+0xa1 #2 0xc0be51b5 at VOP_LOCK1_APV+0xb5 #3 0xc092e478 at _vn_lock+0x78 #4 0xc092f7e6 at vn_write+0x156 #5 0xc08d4475 at dofilewrite+0x95 #6 0xc08d5bc8 at kern_writev+0x58 #7 0xc08d5cdf at write+0x4f #8 0xc0bcc364 at syscall+0x2b4 #9 0xc0bae6a0 at Xint0x80_syscall+0x20 ino 471045, on dev ad0s1e 0xc5dd715c: tag ufs, type VREG usecount 1, writecount 1, refcount 205 mountedhere 0 flags () v_object 0xc65df330 ref 0 pages 4596 lock type ufs: EXCL by thread 0xc529db40 (pid 8984) #0 0xc087787e at __lockmgr_args+0xc1e #1 0xc0ad4b01 at ffs_lock+0xa1 #2 0xc0be51b5 at VOP_LOCK1_APV+0xb5 #3 0xc092e478 at _vn_lock+0x78 #4 0xc092f7e6 at vn_write+0x156 #5 0xc08d4475 at dofilewrite+0x95 #6 0xc08d5bc8 at kern_writev+0x58 #7 0xc08d5cdf at write+0x4f #8 0xc0bcc364 at syscall+0x2b4 #9 0xc0bae6a0 at Xint0x80_syscall+0x20 ino 494596, on dev ad0s1e 0xc4caf000: tag ufs, type VREG usecount 1, writecount 1, refcount 141 mountedhere 0 flags () v_object 0xc608c550 ref 0 pages 4700 lock type ufs: EXCL by thread 0xc50ccd80 (pid 8983) #0 0xc087787e at __lockmgr_args+0xc1e #1 0xc0ad4b01 at ffs_lock+0xa1 #2 0xc0be51b5 at VOP_LOCK1_APV+0xb5 #3 0xc092e478 at _vn_lock+0x78 #4 0xc092f7e6 at vn_write+0x156 #5 0xc08d4475 at dofilewrite+0x95 #6 0xc08d5bc8 at kern_writev+0x58 #7 0xc08d5cdf at write+0x4f #8 0xc0bcc364 at syscall+0x2b4 #9 0xc0bae6a0 at Xint0x80_syscall+0x20 ino 494597, on dev ad0s1e 0xc54ba15c: tag ufs, type VREG usecount 1, writecount 1, refcount 188 mountedhere 0 flags () v_object 0xc5e66dd0 ref 0 pages 5188 lock type ufs: EXCL by thread 0xc4be7240 (pid 8985) #0 0xc087787e at __lockmgr_args+0xc1e #1 0xc0ad4b01 at ffs_lock+0xa1 #2 0xc0be51b5 at VOP_LOCK1_APV+0xb5 #3 0xc092e478 at _vn_lock+0x78 #4 0xc092f7e6 at vn_write+0x156 #5 0xc08d4475 at dofilewrite+0x95 #6 0xc08d5bc8 at kern_writev+0x58 #7 0xc08d5cdf at write+0x4f #8 0xc0bcc364 at syscall+0x2b4 #9 0xc0bae6a0 at Xint0x80_syscall+0x20 ino 518148, on dev ad0s1e 0xc5dd7000: tag ufs, type VREG usecount 1, writecount 1, refcount 231 mountedhere 0 flags () v_object 0xc65df4c8 ref 0 pages 5068 lock type ufs: EXCL by thread 0xc4bfe240 (pid 8989) #0 0xc087787e at __lockmgr_args+0xc1e #1 0xc0ad4b01 at ffs_lock+0xa1 #2 0xc0be51b5 at VOP_LOCK1_APV+0xb5 #3 0xc092e478 at _vn_lock+0x78 #4 0xc092f7e6 at vn_write+0x156 #5 0xc08d4475 at dofilewrite+0x95 #6 0xc08d5bc8 at kern_writev+0x58 #7 0xc08d5cdf at write+0x4f #8 0xc0bcc364 at syscall+0x2b4 #9 0xc0bae6a0 at Xint0x80_syscall+0x20 ino 47108, on dev ad0s1e 0xc5dd76cc: tag ufs, type VREG usecount 1, writecount 1, refcount 146 mountedhere 0 flags () v_object 0xc618baa0 ref 0 pages 4532 lock type ufs: EXCL by thread 0xc4d1d240 (pid 8995) #0 0xc087787e at __lockmgr_args+0xc1e #1 0xc0ad4b01 at ffs_lock+0xa1 #2 0xc0be51b5 at VOP_LOCK1_APV+0xb5 #3 0xc092e478 at _vn_lock+0x78 #4 0xc092f7e6 at vn_write+0x156 #5 0xc08d4475 at dofilewrite+0x95 #6 0xc08d5bc8 at kern_writev+0x58 #7 0xc08d5cdf at write+0x4f #8 0xc0bcc364 at syscall+0x2b4 #9 0xc0bae6a0 at Xint0x80_syscall+0x20 ino 518150, on dev ad0s1e 0xc5e0715c: tag ufs, type VREG usecount 1, writecount 1, refcount 194 mountedhere 0 flags () v_object 0xc633ebb0 ref 0 pages 4988 lock type ufs: EXCL by thread 0xc4a9a480 (pid 8988) #0 0xc087787e at __lockmgr_args+0xc1e #1 0xc0ad4b01 at ffs_lock+0xa1 #2 0xc0be51b5 at VOP_LOCK1_APV+0xb5 #3 0xc092e478 at _vn_lock+0x78 #4 0xc092f7e6 at vn_write+0x156 #5 0xc08d4475 at dofilewrite+0x95 #6 0xc08d5bc8 at kern_writev+0x58 #7 0xc08d5cdf at write+0x4f #8 0xc0bcc364 at syscall+0x2b4 #9 0xc0bae6a0 at Xint0x80_syscall+0x20 ino 23557, on dev ad0s1e 0xc573c15c: tag ufs, type VREG usecount 1, writecount 1, refcount 164 mountedhere 0 flags () v_object 0xc72fb770 ref 0 pages 4564 lock type ufs: EXCL by thread 0xc4be7000 (pid 9000) #0 0xc087787e at __lockmgr_args+0xc1e #1 0xc0ad4b01 at ffs_lock+0xa1 #2 0xc0be51b5 at VOP_LOCK1_APV+0xb5 #3 0xc092e478 at _vn_lock+0x78 #4 0xc092f7e6 at vn_write+0x156 #5 0xc08d4475 at dofilewrite+0x95 #6 0xc08d5bc8 at kern_writev+0x58 #7 0xc08d5cdf at write+0x4f #8 0xc0bcc364 at syscall+0x2b4 #9 0xc0bae6a0 at Xint0x80_syscall+0x20 ino 70661, on dev ad0s1e 0xc4caf570: tag ufs, type VREG usecount 1, writecount 1, refcount 167 mountedhere 0 flags () v_object 0xc608c3b8 ref 0 pages 4952 lock type ufs: EXCL by thread 0xc4b43000 (pid 8997) #0 0xc087787e at __lockmgr_args+0xc1e #1 0xc0ad4b01 at ffs_lock+0xa1 #2 0xc0be51b5 at VOP_LOCK1_APV+0xb5 #3 0xc092e478 at _vn_lock+0x78 #4 0xc092f7e6 at vn_write+0x156 #5 0xc08d4475 at dofilewrite+0x95 #6 0xc08d5bc8 at kern_writev+0x58 #7 0xc08d5cdf at write+0x4f #8 0xc0bcc364 at syscall+0x2b4 #9 0xc0bae6a0 at Xint0x80_syscall+0x20 ino 47110, on dev ad0s1e 0xc515e6cc: tag ufs, type VREG usecount 1, writecount 1, refcount 160 mountedhere 0 flags () v_object 0xc56aca18 ref 0 pages 4484 lock type ufs: EXCL by thread 0xc50cfb40 (pid 8992) #0 0xc087787e at __lockmgr_args+0xc1e #1 0xc0ad4b01 at ffs_lock+0xa1 #2 0xc0be51b5 at VOP_LOCK1_APV+0xb5 #3 0xc092e478 at _vn_lock+0x78 #4 0xc092f7e6 at vn_write+0x156 #5 0xc08d4475 at dofilewrite+0x95 #6 0xc08d5bc8 at kern_writev+0x58 #7 0xc08d5cdf at write+0x4f #8 0xc0bcc364 at syscall+0x2b4 #9 0xc0bae6a0 at Xint0x80_syscall+0x20 ino 447494, on dev ad0s1e 0xc4caf828: tag ufs, type VREG usecount 1, writecount 1, refcount 164 mountedhere 0 flags () v_object 0xc608ccc0 ref 0 pages 4836 lock type ufs: EXCL by thread 0xc529d240 (pid 8993) #0 0xc087787e at __lockmgr_args+0xc1e #1 0xc0ad4b01 at ffs_lock+0xa1 #2 0xc0be51b5 at VOP_LOCK1_APV+0xb5 #3 0xc092e478 at _vn_lock+0x78 #4 0xc092f7e6 at vn_write+0x156 #5 0xc08d4475 at dofilewrite+0x95 #6 0xc08d5bc8 at kern_writev+0x58 #7 0xc08d5cdf at write+0x4f #8 0xc0bcc364 at syscall+0x2b4 #9 0xc0bae6a0 at Xint0x80_syscall+0x20 ino 471046, on dev ad0s1e 0xc5069414: tag ufs, type VREG usecount 1, writecount 1, refcount 178 mountedhere 0 flags () v_object 0xc56ac440 ref 0 pages 4888 lock type ufs: EXCL by thread 0xc4b46b40 (pid 8999) #0 0xc087787e at __lockmgr_args+0xc1e #1 0xc0ad4b01 at ffs_lock+0xa1 #2 0xc0be51b5 at VOP_LOCK1_APV+0xb5 #3 0xc092e478 at _vn_lock+0x78 #4 0xc092f7e6 at vn_write+0x156 #5 0xc08d4475 at dofilewrite+0x95 #6 0xc08d5bc8 at kern_writev+0x58 #7 0xc08d5cdf at write+0x4f #8 0xc0bcc364 at syscall+0x2b4 #9 0xc0bae6a0 at Xint0x80_syscall+0x20 ino 70662, on dev ad0s1e 0xc53cad98: tag ufs, type VREG usecount 1, writecount 1, refcount 205 mountedhere 0 flags () v_object 0xc4e0cc38 ref 0 pages 4784 lock type ufs: EXCL by thread 0xc4bfe000 (pid 9002) #0 0xc087787e at __lockmgr_args+0xc1e #1 0xc0ad4b01 at ffs_lock+0xa1 #2 0xc0be51b5 at VOP_LOCK1_APV+0xb5 #3 0xc092e478 at _vn_lock+0x78 #4 0xc092f7e6 at vn_write+0x156 #5 0xc08d4475 at dofilewrite+0x95 #6 0xc08d5bc8 at kern_writev+0x58 #7 0xc08d5cdf at write+0x4f #8 0xc0bcc364 at syscall+0x2b4 #9 0xc0bae6a0 at Xint0x80_syscall+0x20 ino 94213, on dev ad0s1e 0xc54e46cc: tag ufs, type VREG usecount 1, writecount 1, refcount 204 mountedhere 0 flags () v_object 0xc61caaa0 ref 0 pages 4988 lock type ufs: EXCL by thread 0xc5191900 (pid 9006) #0 0xc087787e at __lockmgr_args+0xc1e #1 0xc0ad4b01 at ffs_lock+0xa1 #2 0xc0be51b5 at VOP_LOCK1_APV+0xb5 #3 0xc092e478 at _vn_lock+0x78 #4 0xc092f7e6 at vn_write+0x156 #5 0xc08d4475 at dofilewrite+0x95 #6 0xc08d5bc8 at kern_writev+0x58 #7 0xc08d5cdf at write+0x4f #8 0xc0bcc364 at syscall+0x2b4 #9 0xc0bae6a0 at Xint0x80_syscall+0x20 ino 117765, on dev ad0s1e 0xc5fe7570: tag ufs, type VREG usecount 1, writecount 1, refcount 163 mountedhere 0 flags () v_object 0xc4dfdc38 ref 0 pages 4912 lock type ufs: EXCL by thread 0xc4a7e6c0 (pid 9008) #0 0xc087787e at __lockmgr_args+0xc1e #1 0xc0ad4b01 at ffs_lock+0xa1 #2 0xc0be51b5 at VOP_LOCK1_APV+0xb5 #3 0xc092e478 at _vn_lock+0x78 #4 0xc092f7e6 at vn_write+0x156 #5 0xc08d4475 at dofilewrite+0x95 #6 0xc08d5bc8 at kern_writev+0x58 #7 0xc08d5cdf at write+0x4f #8 0xc0bcc364 at syscall+0x2b4 #9 0xc0bae6a0 at Xint0x80_syscall+0x20 ino 141316, on dev ad0s1e 0xc4caf6cc: tag ufs, type VREG usecount 1, writecount 1, refcount 194 mountedhere 0 flags () v_object 0xc608cd48 ref 0 pages 4968 lock type ufs: EXCL by thread 0xc4afa900 (pid 9005) #0 0xc087787e at __lockmgr_args+0xc1e #1 0xc0ad4b01 at ffs_lock+0xa1 #2 0xc0be51b5 at VOP_LOCK1_APV+0xb5 #3 0xc092e478 at _vn_lock+0x78 #4 0xc092f7e6 at vn_write+0x156 #5 0xc08d4475 at dofilewrite+0x95 #6 0xc08d5bc8 at kern_writev+0x58 #7 0xc08d5cdf at write+0x4f #8 0xc0bcc364 at syscall+0x2b4 #9 0xc0bae6a0 at Xint0x80_syscall+0x20 ino 117766, on dev ad0s1e 0xc5feb570: tag ufs, type VREG usecount 1, writecount 1, refcount 188 mountedhere 0 flags () v_object 0xc4d0bbb0 ref 0 pages 4884 lock type ufs: EXCL by thread 0xc529a240 (pid 9015) #0 0xc087787e at __lockmgr_args+0xc1e #1 0xc0ad4b01 at ffs_lock+0xa1 #2 0xc0be51b5 at VOP_LOCK1_APV+0xb5 #3 0xc092e478 at _vn_lock+0x78 #4 0xc092f7e6 at vn_write+0x156 #5 0xc08d4475 at dofilewrite+0x95 #6 0xc08d5bc8 at kern_writev+0x58 #7 0xc08d5cdf at write+0x4f #8 0xc0bcc364 at syscall+0x2b4 #9 0xc0bae6a0 at Xint0x80_syscall+0x20 ino 447495, on dev ad0s1e 0xc55caae0: tag ufs, type VREG usecount 1, writecount 1, refcount 167 mountedhere 0 flags () v_object 0xc5afa4c8 ref 0 pages 5152 lock type ufs: EXCL by thread 0xc4c3b6c0 (pid 9003) #0 0xc087787e at __lockmgr_args+0xc1e #1 0xc0ad4b01 at ffs_lock+0xa1 #2 0xc0be51b5 at VOP_LOCK1_APV+0xb5 #3 0xc092e478 at _vn_lock+0x78 #4 0xc092f7e6 at vn_write+0x156 #5 0xc08d4475 at dofilewrite+0x95 #6 0xc08d5bc8 at kern_writev+0x58 #7 0xc08d5cdf at write+0x4f #8 0xc0bcc364 at syscall+0x2b4 #9 0xc0bae6a0 at Xint0x80_syscall+0x20 ino 94214, on dev ad0s1e 0xc5fe7000: tag ufs, type VREG usecount 1, writecount 1, refcount 204 mountedhere 0 flags () v_object 0xc5293990 ref 0 pages 4824 lock type ufs: EXCL by thread 0xc529a6c0 (pid 9011) #0 0xc087787e at __lockmgr_args+0xc1e #1 0xc0ad4b01 at ffs_lock+0xa1 #2 0xc0be51b5 at VOP_LOCK1_APV+0xb5 #3 0xc092e478 at _vn_lock+0x78 #4 0xc092f7e6 at vn_write+0x156 #5 0xc08d4475 at dofilewrite+0x95 #6 0xc08d5bc8 at kern_writev+0x58 #7 0xc08d5cdf at write+0x4f #8 0xc0bcc364 at syscall+0x2b4 #9 0xc0bae6a0 at Xint0x80_syscall+0x20 ino 164869, on dev ad0s1e 0xc55ca15c: tag ufs, type VREG usecount 1, writecount 1, refcount 163 mountedhere 0 flags () v_object 0xc5713330 ref 0 pages 4548 lock type ufs: EXCL by thread 0xc4b46240 (pid 9017) #0 0xc087787e at __lockmgr_args+0xc1e #1 0xc0ad4b01 at ffs_lock+0xa1 #2 0xc0be51b5 at VOP_LOCK1_APV+0xb5 #3 0xc092e478 at _vn_lock+0x78 #4 0xc092f7e6 at vn_write+0x156 #5 0xc08d4475 at dofilewrite+0x95 #6 0xc08d5bc8 at kern_writev+0x58 #7 0xc08d5cdf at write+0x4f #8 0xc0bcc364 at syscall+0x2b4 #9 0xc0bae6a0 at Xint0x80_syscall+0x20 ino 494599, on dev ad0s1e 0xc54e8570: tag ufs, type VREG usecount 1, writecount 1, refcount 166 mountedhere 0 flags () v_object 0xc56ac2a8 ref 0 pages 4576 lock type ufs: EXCL by thread 0xc529a480 (pid 9012) #0 0xc087787e at __lockmgr_args+0xc1e #1 0xc0ad4b01 at ffs_lock+0xa1 #2 0xc0be51b5 at VOP_LOCK1_APV+0xb5 #3 0xc092e478 at _vn_lock+0x78 #4 0xc092f7e6 at vn_write+0x156 #5 0xc08d4475 at dofilewrite+0x95 #6 0xc08d5bc8 at kern_writev+0x58 #7 0xc08d5cdf at write+0x4f #8 0xc0bcc364 at syscall+0x2b4 #9 0xc0bae6a0 at Xint0x80_syscall+0x20 ino 164870, on dev ad0s1e 0xc4cafd98: tag ufs, type VREG usecount 1, writecount 1, refcount 160 mountedhere 0 flags () v_object 0xc608c660 ref 0 pages 4904 lock type ufs: EXCL by thread 0xc5aa4240 (pid 9014) #0 0xc087787e at __lockmgr_args+0xc1e #1 0xc0ad4b01 at ffs_lock+0xa1 #2 0xc0be51b5 at VOP_LOCK1_APV+0xb5 #3 0xc092e478 at _vn_lock+0x78 #4 0xc092f7e6 at vn_write+0x156 #5 0xc08d4475 at dofilewrite+0x95 #6 0xc08d5bc8 at kern_writev+0x58 #7 0xc08d5cdf at write+0x4f #8 0xc0bcc364 at syscall+0x2b4 #9 0xc0bae6a0 at Xint0x80_syscall+0x20 ino 423942, on dev ad0s1e 0xc53f7ae0: tag ufs, type VREG usecount 1, writecount 1, refcount 174 mountedhere 0 flags () v_object 0xc5cdc198 ref 0 pages 4856 lock type ufs: EXCL by thread 0xc68e7480 (pid 9021) #0 0xc087787e at __lockmgr_args+0xc1e #1 0xc0ad4b01 at ffs_lock+0xa1 #2 0xc0be51b5 at VOP_LOCK1_APV+0xb5 #3 0xc092e478 at _vn_lock+0x78 #4 0xc092f7e6 at vn_write+0x156 #5 0xc08d4475 at dofilewrite+0x95 #6 0xc08d5bc8 at kern_writev+0x58 #7 0xc08d5cdf at write+0x4f #8 0xc0bcc364 at syscall+0x2b4 #9 0xc0bae6a0 at Xint0x80_syscall+0x20 ino 70663, on dev ad0s1e 0xc62b915c: tag ufs, type VREG usecount 1, writecount 1, refcount 180 mountedhere 0 flags () v_object 0xc67ff880 ref 0 pages 4924 lock type ufs: EXCL by thread 0xc5192480 (pid 9016) #0 0xc087787e at __lockmgr_args+0xc1e #1 0xc0ad4b01 at ffs_lock+0xa1 #2 0xc0be51b5 at VOP_LOCK1_APV+0xb5 #3 0xc092e478 at _vn_lock+0x78 #4 0xc092f7e6 at vn_write+0x156 #5 0xc08d4475 at dofilewrite+0x95 #6 0xc08d5bc8 at kern_writev+0x58 #7 0xc08d5cdf at write+0x4f #8 0xc0bcc364 at syscall+0x2b4 #9 0xc0bae6a0 at Xint0x80_syscall+0x20 ino 471047, on dev ad0s1e 0xc4ab0c3c: tag ufs, type VREG usecount 1, writecount 1, refcount 165 mountedhere 0 flags () v_object 0xc65b5c38 ref 0 pages 4976 lock type ufs: EXCL by thread 0xc49a8480 (pid 9020) #0 0xc087787e at __lockmgr_args+0xc1e #1 0xc0ad4b01 at ffs_lock+0xa1 #2 0xc0be51b5 at VOP_LOCK1_APV+0xb5 #3 0xc092e478 at _vn_lock+0x78 #4 0xc092f7e6 at vn_write+0x156 #5 0xc08d4475 at dofilewrite+0x95 #6 0xc08d5bc8 at kern_writev+0x58 #7 0xc08d5cdf at write+0x4f #8 0xc0bcc364 at syscall+0x2b4 #9 0xc0bae6a0 at Xint0x80_syscall+0x20 ino 47111, on dev ad0s1e 0xc62b9414: tag ufs, type VREG usecount 1, writecount 1, refcount 170 mountedhere 0 flags () v_object 0xc4f42220 ref 0 pages 4448 lock type ufs: EXCL by thread 0xc47d5b40 (pid 8976) #0 0xc087787e at __lockmgr_args+0xc1e #1 0xc0ad4b01 at ffs_lock+0xa1 #2 0xc0be51b5 at VOP_LOCK1_APV+0xb5 #3 0xc092e478 at _vn_lock+0x78 #4 0xc092f7e6 at vn_write+0x156 #5 0xc08d4475 at dofilewrite+0x95 #6 0xc08d5bc8 at kern_writev+0x58 #7 0xc08d5cdf at write+0x4f #8 0xc0bcc364 at syscall+0x2b4 #9 0xc0bae6a0 at Xint0x80_syscall+0x20 ino 164871, on dev ad0s1e db:0:lockedvnods> show mount 0xc4ab9b50 /dev/ad0s1a on / (ufs) 0xc4aba000 devfs on /dev (devfs) 0xc4c3d5a8 /dev/ad0s1f on /home (ufs) 0xc4c3d000 /dev/ad0s1d on /usr (ufs) 0xc4abab50 /dev/ad0s1g on /var (ufs) 0xc4e19b50 /dev/ad0s1e on /tmp (ufs) More info: show mount db:0:mount> ps pid ppid pgrp uid state wmesg wchan cmd 9337 1192 1191 1001 S nanslp 0xc0dfe6c4 sleep 9021 8974 8973 1001 R+ rw 9020 8974 8973 1001 R+ rw 9017 8974 8973 1001 R+ rw 9016 8974 8973 1001 R+ rw 9015 8974 8973 1001 R+ rw 9014 8974 8973 1001 R+ rw 9012 8974 8973 1001 R+ rw 9011 8974 8973 1001 R+ CPU 2 rw 9008 8974 8973 1001 S+ wdrain 0xc0f6ad60 rw 9006 8974 8973 1001 S+ wdrain 0xc0f6ad60 rw 9005 8974 8973 1001 R+ rw 9003 8974 8973 1001 S+ wdrain 0xc0f6ad60 rw 9002 8974 8973 1001 R+ rw 9000 8974 8973 1001 R+ rw 8999 8974 8973 1001 R+ rw 8997 8974 8973 1001 R+ rw 8995 8974 8973 1001 S+ wdrain 0xc0f6ad60 rw 8993 8974 8973 1001 R+ rw 8992 8974 8973 1001 S+ wdrain 0xc0f6ad60 rw 8989 8974 8973 1001 S+ wdrain 0xc0f6ad60 rw 8988 8974 8973 1001 R+ rw 8985 8974 8973 1001 S+ wdrain 0xc0f6ad60 rw 8984 8974 8973 1001 S+ wdrain 0xc0f6ad60 rw 8983 8974 8973 1001 R+ rw 8982 8974 8973 1001 R+ rw 8981 8974 8973 1001 R+ rw 8980 8974 8973 1001 S+ wdrain 0xc0f6ad60 rw 8979 8974 8973 1001 R+ rw 8976 8974 8973 1001 R+ rw 8974 8973 8973 1001 S+ wait 0xc5178d48 rw 8973 1111 8973 1001 S+ wait 0xc4a4eaa0 rw 1193 1191 1191 1001 S piperd 0xc4b2c498 awk 1192 1191 1191 1001 S wait 0xc515a7f8 sh 1191 1190 1191 1001 Ss wait 0xc474baa0 sh 1190 1174 1174 1001 S select 0xc4a789e4 sshd 1189 1188 1189 1001 Ss kqread 0xc51c5180 tail 1188 1176 1176 1001 S select 0xc523a4a4 sshd 1181 1180 1181 1001 Rs+ CPU 1 top 1180 1175 1175 1001 R sshd 1176 997 1176 0 Ss sbwait 0xc4c62a60 sshd 1175 997 1175 0 Ss sbwait 0xc4ddc3f0 sshd 1174 997 1174 0 Ss sbwait 0xc4bf0728 sshd 1111 1110 1111 1001 Ss+ wait 0xc4c397f8 bash 1110 1108 1108 1001 S select 0xc4fe87a4 sshd 1108 997 1108 0 Ss sbwait 0xc4bf058c sshd 1107 1 1107 0 Ss+ ttyin 0xc483a070 getty 1106 1 1106 0 Ss+ ttyin 0xc483a270 getty 1105 1 1105 0 Ss+ ttyin 0xc468ee70 getty 1104 1 1104 0 Ss+ ttyin 0xc4820870 getty 1103 1 1103 0 Ss+ ttyin 0xc4550e70 getty 1102 1 1102 0 Ss+ ttyin 0xc468ec70 getty 1101 1 1101 0 Ss+ ttyin 0xc468e670 getty 1100 1 1100 0 Ss+ ttyin 0xc468ea70 getty 1078 1 1078 0 Ss select 0xc48366a4 inetd 1054 1 1054 0 Ss select 0xc4836624 moused 1036 1 1036 0 Ss nanslp 0xc0dfe6c4 watchdogd 1014 1 1014 0 Ss nanslp 0xc0dfe6c4 cron 1008 1 1008 25 Ss pause 0xc4c3aaf8 sendmail 1004 1 1004 0 Ss select 0xc4abe864 sendmail 997 1 997 0 Ss select 0xc4a44564 sshd 960 1 960 0 Ss select 0xc4a789a4 ntpd 859 858 858 0 S (threaded) nfsd 100118 S rpcsvc 0xc4abe690 nfsd: service 100117 S rpcsvc 0xc4a431d0 nfsd: service 100116 S rpcsvc 0xc4abe650 nfsd: service 100074 S rpcsvc 0xc4a78b10 nfsd: master 858 1 858 0 Ss select 0xc4a77124 nfsd 856 1 856 0 Ss select 0xc4a446a4 mountd 761 1 761 0 Ss select 0xc4a44a24 rpcbind 741 1 741 0 Ss select 0xc4abe5e4 syslogd 611 1 611 0 Ss select 0xc4abf7a4 devd 19 0 0 0 SL flowclea 0xc0f6b208 [flowcleaner] 18 0 0 0 SL sdflush 0xc0f76a00 [softdepflush] 17 0 0 0 SL syncer 0xc0f6b014 [syncer] 16 0 0 0 SL vlruwt 0xc4a59d48 [vnlru] 15 0 0 0 SL psleep 0xc0f6ad48 [bufdaemon] 9 0 0 0 SL pgzero 0xc0f77814 [pagezero] 8 0 0 0 SL psleep 0xc0f77444 [vmdaemon] 7 0 0 0 SL psleep 0xc0f7740c [pagedaemon] 6 0 0 0 SL - 0xc482043c [fdc0] 14 0 0 0 SL (threaded) [usb] 100034 D - 0xc479edac [usbus0] 100033 D - 0xc479ed7c [usbus0] 100032 D - 0xc479ed4c [usbus0] 100031 D - 0xc479ed1c [usbus0] 5 0 0 0 SL ccb_scan 0xc0dca8d4 [xpt_thrd] 13 0 0 0 SL - 0xc0dfe524 [yarrow] 4 0 0 0 SL - 0xc0dfc2e4 [g_down] 3 0 0 0 RL CPU 3 [g_up] 2 0 0 0 SL - 0xc0dfc2d8 [g_event] 12 0 0 0 RL (threaded) [intr] 100042 I [irq7: ppc0] 100040 I [swi0: uart uart] 100039 I [irq12: psm0] 100038 I [irq1: atkbd0] 100037 I [irq15: ata1] 100036 RunQ [irq14: ata0] 100035 I [irq17: fxp0] 100030 I [irq16: uhci0] 100028 I [irq9: acpi0] 100027 I [swi5: +] 100022 I [swi2: cambio] 100020 I [swi6: task queue] 100019 I [swi6: Giant taskq] 100012 I [swi4: clock] 100011 I [swi4: clock] 100010 I [swi4: clock] 100009 I [swi4: clock] 100008 I [swi3: vm] 100007 I [swi1: netisr 0] 11 0 0 0 RL (threaded) [idle] 100006 Run CPU 0 [idle: cpu0] 100005 CanRun [idle: cpu1] 100004 CanRun [idle: cpu2] 100003 CanRun [idle: cpu3] 1 0 1 0 SLs wait 0xc457ad48 [init] 10 0 0 0 SL audit_wo 0xc0f76240 [audit] 0 0 0 0 SLs (threaded) [kernel] 100029 D - 0xc4782340 [em0 taskq] 100026 D - 0xc4749180 [kqueue taskq] 100025 D - 0xc47491c0 [acpi_task_2] 100024 D - 0xc47491c0 [acpi_task_1] 100023 D - 0xc47491c0 [acpi_task_0] 100018 D - 0xc4749600 [thread taskq] 100013 D - 0xc4561dc0 [firmware taskq] 100000 D sched 0xc0dfc3c0 [swapper] 8998 8974 8973 1001 Z+ rw 9023 8974 8973 1001 Z+ rw 8987 8974 8973 1001 Z+ rw 9001 8974 8973 1001 Z+ rw 8996 8974 8973 1001 Z+ rw 9019 8974 8973 1001 Z+ rw 8978 8974 8973 1001 Z+ rw 9010 8974 8973 1001 Z+ rw 8990 8974 8973 1001 Z+ rw 9018 8974 8973 1001 Z+ rw 8991 8974 8973 1001 Z+ rw 8986 8974 8973 1001 Z+ rw 8994 8974 8973 1001 Z+ rw 8977 8974 8973 1001 Z+ rw 9007 8974 8973 1001 Z+ rw 9013 8974 8973 1001 Z+ rw 9009 8974 8973 1001 Z+ rw 9022 8974 8973 1001 Z+ rw 9004 8974 8973 1001 Z+ rw 9024 8974 8973 1001 Z+ rw db:0:ps> allt Tracing command sleep pid 9337 tid 100188 td 0xc50f66c0 sched_switch(c50f66c0,0,104,191,97583a12,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,5c,...) at mi_switch+0x200 sleepq_switch(c50f66c0,0,c0ca4047,1a0,5c,...) at sleepq_switch+0x15f sleepq_catch_signals(2711,c08c8460,c50f66c0,2,100,...) at sleepq_catch_signals+0xb7 sleepq_timedwait_sig(c0dfe6c4,5c,c0ca0d98,100,0,...) at sleepq_timedwait_sig+0x1a _sleep(c0dfe6c4,0,15c,c0ca0d98,2711,...) at _sleep+0x31e kern_nanosleep(c50f66c0,e6f05c64,e6f05c6c,a,0,...) at kern_nanosleep+0xc1 nanosleep(c50f66c0,e6f05cf8,8,c50f66c0,c0d8d960,...) at nanosleep+0x6f syscall(e6f05d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (240, FreeBSD ELF32, nanosleep), eip = 0x2815bad7, esp = 0xbfbfeccc, ebp = 0xbfbfed08 --- Tracing command rw pid 9021 tid 100201 td 0xc68e7480 sched_switch(c68e7480,0,104,191,6aa2480e,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,50,...) at mi_switch+0x200 sleepq_switch(c68e7480,0,c0ca4047,260,50,...) at sleepq_switch+0x15f sleepq_wait(d8693260,50,c0cab6d3,4,0,...) at sleepq_wait+0x63 __lockmgr_args(d8693260,81900,c4b767c4,c0cab6d3,50,...) at __lockmgr_args+0xb3f getblk(c4b766cc,113b20,0,4000,0,...) at getblk+0x163 breadn(c4b766cc,113b20,0,4000,0,...) at breadn+0x44 bread(c4b766cc,113b20,0,4000,0,...) at bread+0x4c ffs_blkfree(c4a7a500,c4ad3800,c4b766cc,57ea0,0,...) at ffs_blkfree+0x337 handle_workitem_freefrag(c0f769ac,0,c0cc60e7,ffa,781,...) at handle_workitem_freefrag+0x62 setup_allocindir_phase2(c64ec280,f92,0,0,f92,...) at setup_allocindir_phase2+0x349 softdep_setup_allocindir_page(c53eb3a0,d861ea20,e6f61954,2,f92,...) at softdep_setup_allocindir_page+0x102 ffs_reallocblks(e6f61a38,c0cde46d,0,c53f7ae0,c59161ec,...) at ffs_reallocblks+0x142e VOP_REALLOCBLKS_APV(c0daeb60,e6f61a38,0,4000,0,...) at VOP_REALLOCBLKS_APV+0xd6 cluster_write(c53f7ae0,d87071c0,3e60000,0,7f,...) at cluster_write+0x3c2 ffs_write(e6f61bc4,c0cde7d3,c53f7ae0,0,c53f7ae0,...) at ffs_write+0x5eb VOP_WRITE_APV(c0daeb60,e6f61bc4,c53f7ae0,25f,0,...) at VOP_WRITE_APV+0x136 vn_write(c4b2bc78,e6f61c58,c4c03c80,0,c68e7480,...) at vn_write+0x1ca dofilewrite(e6f61c58,ffffffff,ffffffff,0,c4b2bc78,...) at dofilewrite+0x95 kern_writev(c68e7480,4,e6f61c58,e6f61c78,1,...) at kern_writev+0x58 write(c68e7480,e6f61cf8,c,c0c87c46,c0d8bf90,...) at write+0x4f syscall(e6f61d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (4, FreeBSD ELF32, write), eip = 0x2818e093, esp = 0xbfbfd91c, ebp = 0xbfbfe9c8 --- Tracing command rw pid 9020 tid 100232 td 0xc49a8480 sched_switch(c49a8480,0,104,191,6aa8bf80,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,50,...) at mi_switch+0x200 sleepq_switch(c49a8480,0,c0ca4047,260,50,...) at sleepq_switch+0x15f sleepq_wait(d8693260,50,c0cab6d3,4,0,...) at sleepq_wait+0x63 __lockmgr_args(d8693260,81900,c4b767c4,c0cab6d3,50,...) at __lockmgr_args+0xb3f getblk(c4b766cc,113b20,0,4000,0,...) at getblk+0x163 breadn(c4b766cc,113b20,0,4000,0,...) at breadn+0x44 bread(c4b766cc,113b20,0,4000,0,...) at bread+0x4c ffs_checkblk(4000,d85676a0,e6fcb954,2,fd8,...) at ffs_checkblk+0x1d6 ffs_reallocblks(e6fcba38,c0cde46d,0,c4ab0c3c,c559cc2c,...) at ffs_reallocblks+0x16d1 VOP_REALLOCBLKS_APV(c0daeb60,e6fcba38,0,4000,0,...) at VOP_REALLOCBLKS_APV+0xd6 cluster_write(c4ab0c3c,d877bce0,3f64000,0,7f,...) at cluster_write+0x3c2 ffs_write(e6fcbbc4,c0cde7d3,c4ab0c3c,0,c4ab0c3c,...) at ffs_write+0x5eb VOP_WRITE_APV(c0daeb60,e6fcbbc4,c4ab0c3c,25f,0,...) at VOP_WRITE_APV+0x136 vn_write(c4c1c0a8,e6fcbc58,c4c03c80,0,c49a8480,...) at vn_write+0x1ca dofilewrite(e6fcbc58,ffffffff,ffffffff,0,c4c1c0a8,...) at dofilewrite+0x95 kern_writev(c49a8480,4,e6fcbc58,e6fcbc78,1,...) at kern_writev+0x58 write(c49a8480,e6fcbcf8,c,c0c87c46,c0d8bf90,...) at write+0x4f syscall(e6fcbd38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (4, FreeBSD ELF32, write), eip = 0x2818e093, esp = 0xbfbfd91c, ebp = 0xbfbfe9c8 --- Tracing command rw pid 9017 tid 100072 td 0xc4b46240 sched_switch(c4b46240,0,104,191,6aa3416c,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,50,...) at mi_switch+0x200 sleepq_switch(c4b46240,0,c0ca4047,260,50,...) at sleepq_switch+0x15f sleepq_wait(d8693260,50,c0cab6d3,4,0,...) at sleepq_wait+0x63 __lockmgr_args(d8693260,81900,c4b767c4,c0cab6d3,50,...) at __lockmgr_args+0xb3f getblk(c4b766cc,113b20,0,4000,0,...) at getblk+0x163 breadn(c4b766cc,113b20,0,4000,0,...) at breadn+0x44 bread(c4b766cc,113b20,0,4000,0,...) at bread+0x4c ffs_checkblk(4000,d877f420,e6d2e954,2,fd2,...) at ffs_checkblk+0x1d6 ffs_reallocblks(e6d2ea38,c0cde46d,0,c55ca15c,c4a4302c,...) at ffs_reallocblks+0x16d1 VOP_REALLOCBLKS_APV(c0daeb60,e6d2ea38,0,4000,0,...) at VOP_REALLOCBLKS_APV+0xd6 cluster_write(c55ca15c,d87b3de0,3f4c000,0,7f,...) at cluster_write+0x3c2 ffs_write(e6d2ebc4,c0cde7d3,c55ca15c,0,c55ca15c,...) at ffs_write+0x5eb VOP_WRITE_APV(c0daeb60,e6d2ebc4,c55ca15c,25f,0,...) at VOP_WRITE_APV+0x136 vn_write(c4b2b150,e6d2ec58,c4c03c80,0,c4b46240,...) at vn_write+0x1ca dofilewrite(e6d2ec58,ffffffff,ffffffff,0,c4b2b150,...) at dofilewrite+0x95 kern_writev(c4b46240,4,e6d2ec58,e6d2ec78,1,...) at kern_writev+0x58 write(c4b46240,e6d2ecf8,c,c0c87c46,c0d8bf90,...) at write+0x4f syscall(e6d2ed38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (4, FreeBSD ELF32, write), eip = 0x2818e093, esp = 0xbfbfd91c, ebp = 0xbfbfe9c8 --- Tracing command rw pid 9016 tid 100160 td 0xc5192480 sched_switch(c5192480,0,104,191,6aa28948,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,50,...) at mi_switch+0x200 sleepq_switch(c5192480,0,c0ca4047,260,50,...) at sleepq_switch+0x15f sleepq_wait(d8693260,50,c0cab6d3,4,0,...) at sleepq_wait+0x63 __lockmgr_args(d8693260,81900,c4b767c4,c0cab6d3,50,...) at __lockmgr_args+0xb3f getblk(c4b766cc,113b20,0,4000,0,...) at getblk+0x163 breadn(c4b766cc,113b20,0,4000,0,...) at breadn+0x44 bread(c4b766cc,113b20,0,4000,0,...) at bread+0x4c ffs_clusteralloc(c5fa3bc8,3,57758,0,6,...) at ffs_clusteralloc+0x103 ffs_hashalloc(57758,0,6,6,c0aabde0,...) at ffs_hashalloc+0x85 ffs_reallocblks(e6e92a38,c0cde46d,0,c62b915c,c4a43168,...) at ffs_reallocblks+0x1189 VOP_REALLOCBLKS_APV(c0daeb60,e6e92a38,0,4000,0,...) at VOP_REALLOCBLKS_APV+0xd6 cluster_write(c62b915c,d851e640,3fc0000,0,7f,...) at cluster_write+0x3c2 ffs_write(e6e92bc4,c0cde7d3,c62b915c,0,c62b915c,...) at ffs_write+0x5eb VOP_WRITE_APV(c0daeb60,e6e92bc4,c62b915c,25f,0,...) at VOP_WRITE_APV+0x136 vn_write(c4b01d90,e6e92c58,c4c03c80,0,c5192480,...) at vn_write+0x1ca dofilewrite(e6e92c58,ffffffff,ffffffff,0,c4b01d90,...) at dofilewrite+0x95 kern_writev(c5192480,4,e6e92c58,e6e92c78,1,...) at kern_writev+0x58 write(c5192480,e6e92cf8,c,c0c87c46,c0d8bf90,...) at write+0x4f syscall(e6e92d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (4, FreeBSD ELF32, write), eip = 0x2818e093, esp = 0xbfbfd91c, ebp = 0xbfbfe9c8 --- Tracing command rw pid 9015 tid 100319 td 0xc529a240 sched_switch(c529a240,0,104,191,6aa51e56,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,50,...) at mi_switch+0x200 sleepq_switch(c529a240,0,c0ca4047,260,50,...) at sleepq_switch+0x15f sleepq_wait(d8693260,50,c0cab6d3,4,0,...) at sleepq_wait+0x63 __lockmgr_args(d8693260,81900,c4b767c4,c0cab6d3,50,...) at __lockmgr_args+0xb3f getblk(c4b766cc,113b20,0,4000,0,...) at getblk+0x163 breadn(c4b766cc,113b20,0,4000,0,...) at breadn+0x44 bread(c4b766cc,113b20,0,4000,0,...) at bread+0x4c ffs_checkblk(4000,d85819e0,e7107954,2,f70,...) at ffs_checkblk+0x1d6 ffs_reallocblks(e7107a38,c0cde46d,0,c5feb570,c51a4568,...) at ffs_reallocblks+0x12e2 VOP_REALLOCBLKS_APV(c0daeb60,e7107a38,0,4000,0,...) at VOP_REALLOCBLKS_APV+0xd6 cluster_write(c5feb570,d87b5300,3dd4000,0,7f,...) at cluster_write+0x3c2 ffs_write(e7107bc4,c0cde7d3,c5feb570,0,c5feb570,...) at ffs_write+0x5eb VOP_WRITE_APV(c0daeb60,e7107bc4,c5feb570,25f,0,...) at VOP_WRITE_APV+0x136 vn_write(c6762d90,e7107c58,c4c03c80,0,c529a240,...) at vn_write+0x1ca dofilewrite(e7107c58,ffffffff,ffffffff,0,c6762d90,...) at dofilewrite+0x95 kern_writev(c529a240,4,e7107c58,e7107c78,1,...) at kern_writev+0x58 write(c529a240,e7107cf8,c,c0c87c46,c0d8bf90,...) at write+0x4f syscall(e7107d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (4, FreeBSD ELF32, write), eip = 0x2818e093, esp = 0xbfbfd91c, ebp = 0xbfbfe9c8 --- Tracing command rw pid 9014 tid 100259 td 0xc5aa4240 sched_switch(c5aa4240,0,104,191,6aa82b48,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,50,...) at mi_switch+0x200 sleepq_switch(c5aa4240,0,c0ca4047,260,50,...) at sleepq_switch+0x15f sleepq_wait(d8693260,50,c0cab6d3,4,0,...) at sleepq_wait+0x63 __lockmgr_args(d8693260,81900,c4b767c4,c0cab6d3,50,...) at __lockmgr_args+0xb3f getblk(c4b766cc,113b20,0,4000,0,...) at getblk+0x163 breadn(c4b766cc,113b20,0,4000,0,...) at breadn+0x44 bread(c4b766cc,113b20,0,4000,0,...) at bread+0x4c ffs_checkblk(4000,d84f67a0,e7023954,2,fec,...) at ffs_checkblk+0x1d6 ffs_reallocblks(e7023a38,c0cde46d,0,c4cafd98,c61f60a0,...) at ffs_reallocblks+0x16d1 VOP_REALLOCBLKS_APV(c0daeb60,e7023a38,0,4000,0,...) at VOP_REALLOCBLKS_APV+0xd6 cluster_write(c4cafd98,d876e800,3fb4000,0,7f,...) at cluster_write+0x3c2 ffs_write(e7023bc4,c0cde7d3,c4cafd98,0,c4cafd98,...) at ffs_write+0x5eb VOP_WRITE_APV(c0daeb60,e7023bc4,c4cafd98,25f,0,...) at VOP_WRITE_APV+0x136 vn_write(c4b30968,e7023c58,c4c03c80,0,c5aa4240,...) at vn_write+0x1ca dofilewrite(e7023c58,ffffffff,ffffffff,0,c4b30968,...) at dofilewrite+0x95 kern_writev(c5aa4240,4,e7023c58,e7023c78,1,...) at kern_writev+0x58 write(c5aa4240,e7023cf8,c,c0c87c46,c0d8bf90,...) at write+0x4f syscall(e7023d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (4, FreeBSD ELF32, write), eip = 0x2818e093, esp = 0xbfbfd91c, ebp = 0xbfbfe9c8 --- Tracing command rw pid 9012 tid 100318 td 0xc529a480 sched_switch(c529a480,0,104,191,6aa3a3f6,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,50,...) at mi_switch+0x200 sleepq_switch(c529a480,0,c0ca4047,260,50,...) at sleepq_switch+0x15f sleepq_wait(d8693260,50,c0cab6d3,4,0,...) at sleepq_wait+0x63 __lockmgr_args(d8693260,81900,c4b767c4,c0cab6d3,50,...) at __lockmgr_args+0xb3f getblk(c4b766cc,113b20,0,4000,0,...) at getblk+0x163 breadn(c4b766cc,113b20,0,4000,0,...) at breadn+0x44 bread(c4b766cc,113b20,0,4000,0,...) at bread+0x4c ffs_blkfree(c4a7a500,c4ad3800,c4b766cc,45b88,0,...) at ffs_blkfree+0x337 handle_workitem_freefrag(c0f769ac,0,c0cc60e7,ffa,781,...) at handle_workitem_freefrag+0x62 setup_allocindir_phase2(c568cb80,f9b,0,0,f9b,...) at setup_allocindir_phase2+0x349 softdep_setup_allocindir_page(c54262b8,d8694580,e7104954,2,f9b,...) at softdep_setup_allocindir_page+0x102 ffs_reallocblks(e7104a38,c0cde46d,0,c54e8570,c5ea4c6c,...) at ffs_reallocblks+0x142e VOP_REALLOCBLKS_APV(c0daeb60,e7104a38,0,4000,0,...) at VOP_REALLOCBLKS_APV+0xd6 cluster_write(c54e8570,d878aa20,3e80000,0,7f,...) at cluster_write+0x3c2 ffs_write(e7104bc4,c0cde7d3,c54e8570,0,c54e8570,...) at ffs_write+0x5eb VOP_WRITE_APV(c0daeb60,e7104bc4,c54e8570,25f,0,...) at VOP_WRITE_APV+0x136 vn_write(c4c1ce00,e7104c58,c4c03c80,0,c529a480,...) at vn_write+0x1ca dofilewrite(e7104c58,ffffffff,ffffffff,0,c4c1ce00,...) at dofilewrite+0x95 kern_writev(c529a480,4,e7104c58,e7104c78,1,...) at kern_writev+0x58 write(c529a480,e7104cf8,c,c0c87c46,c0d8bf90,...) at write+0x4f syscall(e7104d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (4, FreeBSD ELF32, write), eip = 0x2818e093, esp = 0xbfbfd91c, ebp = 0xbfbfe9c8 --- Tracing command rw pid 9011 tid 100317 td 0xc529a6c0 cpustop_handler(4,e710168c,c0bcc5e6,e710161c,c087ecb4,...) at cpustop_handler+0x32 ipi_nmi_handler(e710161c,c087ecb4,c0dfe778,4,c4cb52a8,...) at ipi_nmi_handler+0x2f trap(e7101698) at trap+0x36 calltrap() at calltrap+0x6 --- trap 0x13, eip = 0xc087f299, esp = 0xe71016d8, ebp = 0xe71016f4 --- _mtx_lock_sleep(c0f769ac,c529a6c0,0,c0cc60e7,ea3,...) at _mtx_lock_sleep+0x99 _mtx_lock_flags(c0f769ac,0,c0cc60e7,ea3,0,...) at _mtx_lock_flags+0xf7 handle_workitem_freefrag(c0f769ac,0,c0cc60e7,ffa,781,...) at handle_workitem_freefrag+0x86 setup_allocindir_phase2(c5728380,fb2,0,0,fb2,...) at setup_allocindir_phase2+0x349 softdep_setup_allocindir_page(c5fa3828,d86977e0,e7101954,2,fb2,...) at softdep_setup_allocindir_page+0x102 ffs_reallocblks(e7101a38,c0cde46d,0,c5fe7000,c59dc464,...) at ffs_reallocblks+0x142e VOP_REALLOCBLKS_APV(c0daeb60,e7101a38,0,4000,0,...) at VOP_REALLOCBLKS_APV+0xd6 cluster_write(c5fe7000,d8791d80,3ed8000,0,7f,...) at cluster_write+0x3c2 ffs_write(e7101bc4,c0cde7d3,c5fe7000,0,c5fe7000,...) at ffs_write+0x5eb VOP_WRITE_APV(c0daeb60,e7101bc4,c5fe7000,25f,0,...) at VOP_WRITE_APV+0x136 vn_write(c4b01070,e7101c58,c4c03c80,0,c529a6c0,...) at vn_write+0x1ca dofilewrite(e7101c58,ffffffff,ffffffff,0,c4b01070,...) at dofilewrite+0x95 kern_writev(c529a6c0,4,e7101c58,e7101c78,1,...) at kern_writev+0x58 write(c529a6c0,e7101cf8,c,c0c87c46,c0d8bf90,...) at write+0x4f syscall(e7101d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (4, FreeBSD ELF32, write), eip = 0x2818e093, esp = 0xbfbfd91c, ebp = 0xbfbfe9c8 --- Tracing command rw pid 9008 tid 100329 td 0xc4a7e6c0 sched_switch(c4a7e6c0,0,104,191,69e74f14,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,44,...) at mi_switch+0x200 sleepq_switch(c4a7e6c0,0,c0ca4047,260,0,...) at sleepq_switch+0x15f sleepq_wait(c0f6ad60,44,c0caacc0,0,0,...) at sleepq_wait+0x63 _sleep(c0f6ad60,c0f6ad64,44,c0caacc0,0,...) at _sleep+0x36b waitrunningbufspace(c5fe7668,d84e8c40,375,d875b000,c1dd9aa0,...) at waitrunningbufspace+0x6a bufwrite(d84e8c40,8,e71259cc,c090eac7,d84e8c40,...) at bufwrite+0x187 bawrite(d84e8c40,d84e8dc8,20,3af,0,...) at bawrite+0x5c cluster_wbuild(c5fe7570,4000,f7d,0,8,...) at cluster_wbuild+0x837 cluster_write(c5fe7570,d875b000,3df4000,0,7f,...) at cluster_write+0x696 ffs_write(e7125bc4,c0cde7d3,c5fe7570,0,c5fe7570,...) at ffs_write+0x5eb VOP_WRITE_APV(c0daeb60,e7125bc4,c5fe7570,25f,0,...) at VOP_WRITE_APV+0x136 vn_write(c4b3b3f0,e7125c58,c4c03c80,0,c4a7e6c0,...) at vn_write+0x1ca dofilewrite(e7125c58,ffffffff,ffffffff,0,c4b3b3f0,...) at dofilewrite+0x95 kern_writev(c4a7e6c0,4,e7125c58,e7125c78,1,...) at kern_writev+0x58 write(c4a7e6c0,e7125cf8,c,c0c87c46,c0d8bf90,...) at write+0x4f syscall(e7125d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (4, FreeBSD ELF32, write), eip = 0x2818e093, esp = 0xbfbfd91c, ebp = 0xbfbfe9c8 --- Tracing command rw pid 9006 tid 100166 td 0xc5191900 sched_switch(c5191900,0,104,191,676bba78,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,44,...) at mi_switch+0x200 sleepq_switch(c5191900,0,c0ca4047,260,0,...) at sleepq_switch+0x15f sleepq_wait(c0f6ad60,44,c0caacc0,0,0,...) at sleepq_wait+0x63 _sleep(c0f6ad60,c0f6ad64,44,c0caacc0,0,...) at _sleep+0x36b waitrunningbufspace(c54e47c4,d84e5840,375,d8761320,c1e1dc78,...) at waitrunningbufspace+0x6a bufwrite(d84e5840,7,e6eae9cc,c090eac7,d84e5840,...) at bufwrite+0x187 bawrite(d84e5840,d84e59b8,1c,3af,0,...) at bawrite+0x5c cluster_wbuild(c54e46cc,4000,fec,0,7,...) at cluster_wbuild+0x837 cluster_write(c54e46cc,d85010a0,3fb4000,0,7f,...) at cluster_write+0x203 ffs_write(e6eaebc4,c0cde7d3,c54e46cc,0,c54e46cc,...) at ffs_write+0x5eb VOP_WRITE_APV(c0daeb60,e6eaebc4,c54e46cc,25f,0,...) at VOP_WRITE_APV+0x136 vn_write(c4c1c850,e6eaec58,c4c03c80,0,c5191900,...) at vn_write+0x1ca dofilewrite(e6eaec58,ffffffff,ffffffff,0,c4c1c850,...) at dofilewrite+0x95 kern_writev(c5191900,4,e6eaec58,e6eaec78,1,...) at kern_writev+0x58 write(c5191900,e6eaecf8,c,c0c87c46,c0d8bf90,...) at write+0x4f syscall(e6eaed38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (4, FreeBSD ELF32, write), eip = 0x2818e093, esp = 0xbfbfd91c, ebp = 0xbfbfe9c8 --- Tracing command rw pid 9005 tid 100055 td 0xc4afa900 sched_switch(c4afa900,0,104,191,6aa1f25e,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,50,...) at mi_switch+0x200 sleepq_switch(c4afa900,0,c0ca4047,260,50,...) at sleepq_switch+0x15f sleepq_wait(d8693260,50,c0cab6d3,4,0,...) at sleepq_wait+0x63 __lockmgr_args(d8693260,81900,c4b767c4,c0cab6d3,50,...) at __lockmgr_args+0xb3f getblk(c4b766cc,113b20,0,4000,0,...) at getblk+0x163 breadn(c4b766cc,113b20,0,4000,0,...) at breadn+0x44 bread(c4b766cc,113b20,0,4000,0,...) at bread+0x4c ffs_blkfree(c4a7a500,c4ad3800,c4b766cc,45d40,0,...) at ffs_blkfree+0x337 handle_workitem_freefrag(c0f769ac,0,c0cc60e7,ffa,781,...) at handle_workitem_freefrag+0x62 setup_allocindir_phase2(c6582080,f95,0,0,f95,...) at setup_allocindir_phase2+0x349 softdep_setup_allocindir_page(c5931ae0,d8710a80,e6cdd954,2,f95,...) at softdep_setup_allocindir_page+0x102 ffs_reallocblks(e6cdda38,c0cde46d,0,c4caf6cc,c5c86ae8,...) at ffs_reallocblks+0x142e VOP_REALLOCBLKS_APV(c0daeb60,e6cdda38,0,4000,0,...) at VOP_REALLOCBLKS_APV+0xd6 cluster_write(c4caf6cc,d855a1c0,3e60000,0,7f,...) at cluster_write+0x3c2 ffs_write(e6cddbc4,c0cde7d3,c4caf6cc,0,c4caf6cc,...) at ffs_write+0x5eb VOP_WRITE_APV(c0daeb60,e6cddbc4,c4caf6cc,25f,0,...) at VOP_WRITE_APV+0x136 vn_write(c5089968,e6cddc58,c4c03c80,0,c4afa900,...) at vn_write+0x1ca dofilewrite(e6cddc58,ffffffff,ffffffff,0,c5089968,...) at dofilewrite+0x95 kern_writev(c4afa900,4,e6cddc58,e6cddc78,1,...) at kern_writev+0x58 write(c4afa900,e6cddcf8,c,c0c87c46,c0d8bf90,...) at write+0x4f syscall(e6cddd38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (4, FreeBSD ELF32, write), eip = 0x2818e093, esp = 0xbfbfd91c, ebp = 0xbfbfe9c8 --- Tracing command rw pid 9003 tid 100099 td 0xc4c3b6c0 sched_switch(c4c3b6c0,0,104,191,66f89276,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,44,...) at mi_switch+0x200 sleepq_switch(c4c3b6c0,0,c0ca4047,260,0,...) at sleepq_switch+0x15f sleepq_wait(c0f6ad60,44,c0caacc0,0,0,...) at sleepq_wait+0x63 _sleep(c0f6ad60,c0f6ad64,44,c0caacc0,0,...) at _sleep+0x36b waitrunningbufspace(c55cabd8,d84e7240,375,d86ff300,c241e098,...) at waitrunningbufspace+0x6a bufwrite(d84e7240,7,e6d989cc,c090eac7,d84e7240,...) at bufwrite+0x187 bawrite(d84e7240,d84e73b8,1c,3af,0,...) at bawrite+0x5c cluster_wbuild(c55caae0,4000,fbb,0,7,...) at cluster_wbuild+0x837 cluster_write(c55caae0,d8781300,3ef0000,0,7f,...) at cluster_write+0x203 ffs_write(e6d98bc4,c0cde7d3,c55caae0,0,c55caae0,...) at ffs_write+0x5eb VOP_WRITE_APV(c0daeb60,e6d98bc4,c55caae0,25f,0,...) at VOP_WRITE_APV+0x136 vn_write(c4b300a8,e6d98c58,c4c03c80,0,c4c3b6c0,...) at vn_write+0x1ca dofilewrite(e6d98c58,ffffffff,ffffffff,0,c4b300a8,...) at dofilewrite+0x95 kern_writev(c4c3b6c0,4,e6d98c58,e6d98c78,1,...) at kern_writev+0x58 write(c4c3b6c0,e6d98cf8,c,c0c87c46,c0d8bf90,...) at write+0x4f syscall(e6d98d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (4, FreeBSD ELF32, write), eip = 0x2818e093, esp = 0xbfbfd91c, ebp = 0xbfbfe9c8 --- Tracing command rw pid 9002 tid 100085 td 0xc4bfe000 sched_switch(c4bfe000,0,104,191,6aa226dc,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,50,...) at mi_switch+0x200 sleepq_switch(c4bfe000,0,c0ca4047,260,50,...) at sleepq_switch+0x15f sleepq_wait(d8693260,50,c0cab6d3,4,0,...) at sleepq_wait+0x63 __lockmgr_args(d8693260,81900,c4b767c4,c0cab6d3,50,...) at __lockmgr_args+0xb3f getblk(c4b766cc,113b20,0,4000,0,...) at getblk+0x163 breadn(c4b766cc,113b20,0,4000,0,...) at breadn+0x44 bread(c4b766cc,113b20,0,4000,0,...) at bread+0x4c ffs_checkblk(4000,d8628b00,e6d69954,2,fba,...) at ffs_checkblk+0x1d6 ffs_reallocblks(e6d69a38,c0cde46d,0,c53cad98,c4c5d5ec,...) at ffs_reallocblks+0x16d1 VOP_REALLOCBLKS_APV(c0daeb60,e6d69a38,0,4000,0,...) at VOP_REALLOCBLKS_APV+0xd6 cluster_write(c53cad98,d8731de0,3eec000,0,7f,...) at cluster_write+0x3c2 ffs_write(e6d69bc4,c0cde7d3,c53cad98,0,c53cad98,...) at ffs_write+0x5eb VOP_WRITE_APV(c0daeb60,e6d69bc4,c53cad98,25f,0,...) at VOP_WRITE_APV+0x136 vn_write(c4b2b3f0,e6d69c58,c4c03c80,0,c4bfe000,...) at vn_write+0x1ca dofilewrite(e6d69c58,ffffffff,ffffffff,0,c4b2b3f0,...) at dofilewrite+0x95 kern_writev(c4bfe000,4,e6d69c58,e6d69c78,1,...) at kern_writev+0x58 write(c4bfe000,e6d69cf8,c,c0c87c46,c0d8bf90,...) at write+0x4f syscall(e6d69d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (4, FreeBSD ELF32, write), eip = 0x2818e093, esp = 0xbfbfd91c, ebp = 0xbfbfe9c8 --- Tracing command rw pid 9000 tid 100210 td 0xc4be7000 sched_switch(c4be7000,0,104,191,6aa2e0b8,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,50,...) at mi_switch+0x200 sleepq_switch(c4be7000,0,c0ca4047,260,50,...) at sleepq_switch+0x15f sleepq_wait(d8693260,50,c0cab6d3,4,0,...) at sleepq_wait+0x63 __lockmgr_args(d8693260,81900,c4b767c4,c0cab6d3,50,...) at __lockmgr_args+0xb3f getblk(c4b766cc,113b20,0,4000,0,...) at getblk+0x163 breadn(c4b766cc,113b20,0,4000,0,...) at breadn+0x44 bread(c4b766cc,113b20,0,4000,0,...) at bread+0x4c ffs_checkblk(4000,3,c4be7000,e6f84824,246,...) at ffs_checkblk+0x1d6 ffs_reallocblks(e6f84a38,c0cde46d,0,c573c15c,c5c99128,...) at ffs_reallocblks+0xc69 VOP_REALLOCBLKS_APV(c0daeb60,e6f84a38,0,4000,0,...) at VOP_REALLOCBLKS_APV+0xd6 cluster_write(c573c15c,d8727820,3ef4000,0,7f,...) at cluster_write+0x3c2 ffs_write(e6f84bc4,c0cde7d3,c573c15c,0,c573c15c,...) at ffs_write+0x5eb VOP_WRITE_APV(c0daeb60,e6f84bc4,c573c15c,25f,0,...) at VOP_WRITE_APV+0x136 vn_write(c4e1b070,e6f84c58,c4c03c80,0,c4be7000,...) at vn_write+0x1ca dofilewrite(e6f84c58,ffffffff,ffffffff,0,c4e1b070,...) at dofilewrite+0x95 kern_writev(c4be7000,4,e6f84c58,e6f84c78,1,...) at kern_writev+0x58 write(c4be7000,e6f84cf8,c,c0c87c46,c0d8bf90,...) at write+0x4f syscall(e6f84d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (4, FreeBSD ELF32, write), eip = 0x2818e093, esp = 0xbfbfd91c, ebp = 0xbfbfe9c8 --- Tracing command rw pid 8999 tid 100061 td 0xc4b46b40 sched_switch(c4b46b40,0,104,191,6aa465d6,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,50,...) at mi_switch+0x200 sleepq_switch(c4b46b40,0,c0ca4047,260,50,...) at sleepq_switch+0x15f sleepq_wait(d8693260,50,c0cab6d3,4,0,...) at sleepq_wait+0x63 __lockmgr_args(d8693260,81900,c4b767c4,c0cab6d3,50,...) at __lockmgr_args+0xb3f getblk(c4b766cc,113b20,0,4000,0,...) at getblk+0x163 breadn(c4b766cc,113b20,0,4000,0,...) at breadn+0x44 bread(c4b766cc,113b20,0,4000,0,...) at bread+0x4c ffs_checkblk(4000,d85c5760,e6cf5954,2,fec,...) at ffs_checkblk+0x1d6 ffs_reallocblks(e6cf5a38,c0cde46d,0,c5069414,c5be7220,...) at ffs_reallocblks+0x16d1 VOP_REALLOCBLKS_APV(c0daeb60,e6cf5a38,0,4000,0,...) at VOP_REALLOCBLKS_APV+0xd6 cluster_write(c5069414,d87a8cc0,3fb4000,0,7f,...) at cluster_write+0x3c2 ffs_write(e6cf5bc4,c0cde7d3,c5069414,0,c5069414,...) at ffs_write+0x5eb VOP_WRITE_APV(c0daeb60,e6cf5bc4,c5069414,25f,0,...) at VOP_WRITE_APV+0x136 vn_write(c4c1c000,e6cf5c58,c4c03c80,0,c4b46b40,...) at vn_write+0x1ca dofilewrite(e6cf5c58,ffffffff,ffffffff,0,c4c1c000,...) at dofilewrite+0x95 kern_writev(c4b46b40,4,e6cf5c58,e6cf5c78,1,...) at kern_writev+0x58 write(c4b46b40,e6cf5cf8,c,c0c87c46,c0d8bf90,...) at write+0x4f syscall(e6cf5d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (4, FreeBSD ELF32, write), eip = 0x2818e093, esp = 0xbfbfd91c, ebp = 0xbfbfe9c8 --- Tracing command rw pid 8997 tid 100198 td 0xc4b43000 sched_switch(c4b43000,0,104,191,6aa4b1b6,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,50,...) at mi_switch+0x200 sleepq_switch(c4b43000,0,c0ca4047,260,50,...) at sleepq_switch+0x15f sleepq_wait(d8693260,50,c0cab6d3,4,0,...) at sleepq_wait+0x63 __lockmgr_args(d8693260,81900,c4b767c4,c0cab6d3,50,...) at __lockmgr_args+0xb3f getblk(c4b766cc,113b20,0,4000,0,...) at getblk+0x163 breadn(c4b766cc,113b20,0,4000,0,...) at breadn+0x44 bread(c4b766cc,113b20,0,4000,0,...) at bread+0x4c ffs_checkblk(4000,d871e920,e6f55954,2,f82,...) at ffs_checkblk+0x1d6 ffs_reallocblks(e6f55a38,c0cde46d,0,c4caf570,c60ffaac,...) at ffs_reallocblks+0x12e2 VOP_REALLOCBLKS_APV(c0daeb60,e6f55a38,0,4000,0,...) at VOP_REALLOCBLKS_APV+0xd6 cluster_write(c4caf570,d8731760,3e24000,0,7f,...) at cluster_write+0x3c2 ffs_write(e6f55bc4,c0cde7d3,c4caf570,0,c4caf570,...) at ffs_write+0x5eb VOP_WRITE_APV(c0daeb60,e6f55bc4,c4caf570,25f,0,...) at VOP_WRITE_APV+0x136 vn_write(c4e1b310,e6f55c58,c4c03c80,0,c4b43000,...) at vn_write+0x1ca dofilewrite(e6f55c58,ffffffff,ffffffff,0,c4e1b310,...) at dofilewrite+0x95 kern_writev(c4b43000,4,e6f55c58,e6f55c78,1,...) at kern_writev+0x58 write(c4b43000,e6f55cf8,c,c0c87c46,c0d8bf90,...) at write+0x4f syscall(e6f55d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (4, FreeBSD ELF32, write), eip = 0x2818e093, esp = 0xbfbfd91c, ebp = 0xbfbfe9c8 --- Tracing command rw pid 8995 tid 100113 td 0xc4d1d240 sched_switch(c4d1d240,0,104,191,679caedc,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,44,...) at mi_switch+0x200 sleepq_switch(c4d1d240,0,c0ca4047,260,0,...) at sleepq_switch+0x15f sleepq_wait(c0f6ad60,44,c0caacc0,0,0,...) at sleepq_wait+0x63 _sleep(c0f6ad60,c0f6ad64,44,c0caacc0,0,...) at _sleep+0x36b waitrunningbufspace(c5dd77c4,d84e6bc0,375,d871c3c0,c1d656b8,...) at waitrunningbufspace+0x6a bufwrite(d84e6bc0,7,e6dfa9cc,c090eac7,d84e6bc0,...) at bufwrite+0x187 bawrite(d84e6bc0,d84e6d38,1c,3af,0,...) at bawrite+0x5c cluster_wbuild(c5dd76cc,4000,f84,0,7,...) at cluster_wbuild+0x837 cluster_write(c5dd76cc,d8793100,3e14000,0,7f,...) at cluster_write+0x203 ffs_write(e6dfabc4,c0cde7d3,c5dd76cc,0,c5dd76cc,...) at ffs_write+0x5eb VOP_WRITE_APV(c0daeb60,e6dfabc4,c5dd76cc,25f,0,...) at VOP_WRITE_APV+0x136 vn_write(c4c1c038,e6dfac58,c4c03c80,0,c4d1d240,...) at vn_write+0x1ca dofilewrite(e6dfac58,ffffffff,ffffffff,0,c4c1c038,...) at dofilewrite+0x95 kern_writev(c4d1d240,4,e6dfac58,e6dfac78,1,...) at kern_writev+0x58 write(c4d1d240,e6dfacf8,c,c0c87c46,c0d8bf90,...) at write+0x4f syscall(e6dfad38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (4, FreeBSD ELF32, write), eip = 0x2818e093, esp = 0xbfbfd91c, ebp = 0xbfbfe9c8 --- Tracing command rw pid 8993 tid 100237 td 0xc529d240 sched_switch(c529d240,0,104,191,6aa64692,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,50,...) at mi_switch+0x200 sleepq_switch(c529d240,0,c0ca4047,260,50,...) at sleepq_switch+0x15f sleepq_wait(d8693260,50,c0cab6d3,4,0,...) at sleepq_wait+0x63 __lockmgr_args(d8693260,81900,c4b767c4,c0cab6d3,50,...) at __lockmgr_args+0xb3f getblk(c4b766cc,113b20,0,4000,0,...) at getblk+0x163 breadn(c4b766cc,113b20,0,4000,0,...) at breadn+0x44 bread(c4b766cc,113b20,0,4000,0,...) at bread+0x4c ffs_checkblk(4000,d8633f60,e6fda954,2,f93,...) at ffs_checkblk+0x1d6 ffs_reallocblks(e6fdaa38,c0cde46d,0,c4caf828,c6f72164,...) at ffs_reallocblks+0x12e2 VOP_REALLOCBLKS_APV(c0daeb60,e6fdaa38,0,4000,0,...) at VOP_REALLOCBLKS_APV+0xd6 cluster_write(c4caf828,d85085a0,3e58000,0,7f,...) at cluster_write+0x3c2 ffs_write(e6fdabc4,c0cde7d3,c4caf828,0,c4caf828,...) at ffs_write+0x5eb VOP_WRITE_APV(c0daeb60,e6fdabc4,c4caf828,25f,0,...) at VOP_WRITE_APV+0x136 vn_write(c4c1cd58,e6fdac58,c4c03c80,0,c529d240,...) at vn_write+0x1ca dofilewrite(e6fdac58,ffffffff,ffffffff,0,c4c1cd58,...) at dofilewrite+0x95 kern_writev(c529d240,4,e6fdac58,e6fdac78,1,...) at kern_writev+0x58 write(c529d240,e6fdacf8,c,c0c87c46,c0d8bf90,...) at write+0x4f syscall(e6fdad38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (4, FreeBSD ELF32, write), eip = 0x2818e093, esp = 0xbfbfd91c, ebp = 0xbfbfe9c8 --- Tracing command rw pid 8992 tid 100309 td 0xc50cfb40 sched_switch(c50cfb40,0,104,191,6907be5e,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,44,...) at mi_switch+0x200 sleepq_switch(c50cfb40,0,c0ca4047,260,0,...) at sleepq_switch+0x15f sleepq_wait(c0f6ad60,44,c0caacc0,0,0,...) at sleepq_wait+0x63 _sleep(c0f6ad60,c0f6ad64,44,c0caacc0,0,...) at _sleep+0x36b waitrunningbufspace(c515e7c4,d84e5ec0,375,d87850c0,c1e16cd0,...) at waitrunningbufspace+0x6a bufwrite(d84e5ec0,8,e70b99cc,c090eac7,d84e5ec0,...) at bufwrite+0x187 bawrite(d84e5ec0,d84e6048,20,3af,0,...) at bawrite+0x5c cluster_wbuild(c515e6cc,4000,f5a,0,8,...) at cluster_wbuild+0x837 cluster_write(c515e6cc,d87850c0,3d68000,0,7f,...) at cluster_write+0x696 ffs_write(e70b9bc4,c0cde7d3,c515e6cc,0,c515e6cc,...) at ffs_write+0x5eb VOP_WRITE_APV(c0daeb60,e70b9bc4,c515e6cc,25f,0,...) at VOP_WRITE_APV+0x136 vn_write(c5089508,e70b9c58,c4c03c80,0,c50cfb40,...) at vn_write+0x1ca dofilewrite(e70b9c58,ffffffff,ffffffff,0,c5089508,...) at dofilewrite+0x95 kern_writev(c50cfb40,4,e70b9c58,e70b9c78,1,...) at kern_writev+0x58 write(c50cfb40,e70b9cf8,c,c0c87c46,c0d8bf90,...) at write+0x4f syscall(e70b9d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (4, FreeBSD ELF32, write), eip = 0x2818e093, esp = 0xbfbfd91c, ebp = 0xbfbfe9c8 --- Tracing command rw pid 8989 tid 100084 td 0xc4bfe240 sched_switch(c4bfe240,0,104,191,6a7797c2,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,44,...) at mi_switch+0x200 sleepq_switch(c4bfe240,0,c0ca4047,260,0,...) at sleepq_switch+0x15f sleepq_wait(c0f6ad60,44,c0caacc0,0,0,...) at sleepq_wait+0x63 _sleep(c0f6ad60,c0f6ad64,44,c0caacc0,0,...) at _sleep+0x36b waitrunningbufspace(c5dd70f8,d84e6060,375,d8796500,c24ca910,...) at waitrunningbufspace+0x6a bufwrite(d84e6060,8,e6d659cc,c090eac7,d84e6060,...) at bufwrite+0x187 bawrite(d84e6060,d84e61e8,20,3af,0,...) at bawrite+0x5c cluster_wbuild(c5dd7000,4000,1004,0,8,...) at cluster_wbuild+0x837 cluster_write(c5dd7000,d8796500,4010000,0,7f,...) at cluster_write+0x696 ffs_write(e6d65bc4,c0cde7d3,c5dd7000,0,c5dd7000,...) at ffs_write+0x5eb VOP_WRITE_APV(c0daeb60,e6d65bc4,c5dd7000,25f,0,...) at VOP_WRITE_APV+0x136 vn_write(c4c1ccb0,e6d65c58,c4c03c80,0,c4bfe240,...) at vn_write+0x1ca dofilewrite(e6d65c58,ffffffff,ffffffff,0,c4c1ccb0,...) at dofilewrite+0x95 kern_writev(c4bfe240,4,e6d65c58,e6d65c78,1,...) at kern_writev+0x58 write(c4bfe240,e6d65cf8,c,c0c87c46,c0d8bf90,...) at write+0x4f syscall(e6d65d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (4, FreeBSD ELF32, write), eip = 0x2818e093, esp = 0xbfbfd91c, ebp = 0xbfbfe9c8 --- Tracing command rw pid 8988 tid 100323 td 0xc4a9a480 sched_switch(c4a9a480,0,104,191,6aa577a6,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,50,...) at mi_switch+0x200 sleepq_switch(c4a9a480,0,c0ca4047,260,50,...) at sleepq_switch+0x15f sleepq_wait(d8693260,50,c0cab6d3,4,0,...) at sleepq_wait+0x63 __lockmgr_args(d8693260,81900,c4b767c4,c0cab6d3,50,...) at __lockmgr_args+0xb3f getblk(c4b766cc,113b20,0,4000,0,...) at getblk+0x163 breadn(c4b766cc,113b20,0,4000,0,...) at breadn+0x44 bread(c4b766cc,113b20,0,4000,0,...) at bread+0x4c ffs_checkblk(4000,d86aab00,e7113954,2,fb3,...) at ffs_checkblk+0x1d6 ffs_reallocblks(e7113a38,c0cde46d,0,c5e0715c,c59df564,...) at ffs_reallocblks+0x12e2 VOP_REALLOCBLKS_APV(c0daeb60,e7113a38,0,4000,0,...) at VOP_REALLOCBLKS_APV+0xd6 cluster_write(c5e0715c,d8760620,3ee0000,0,7f,...) at cluster_write+0x3c2 ffs_write(e7113bc4,c0cde7d3,c5e0715c,0,c5e0715c,...) at ffs_write+0x5eb VOP_WRITE_APV(c0daeb60,e7113bc4,c5e0715c,25f,0,...) at VOP_WRITE_APV+0x136 vn_write(c52ccb60,e7113c58,c4c03c80,0,c4a9a480,...) at vn_write+0x1ca dofilewrite(e7113c58,ffffffff,ffffffff,0,c52ccb60,...) at dofilewrite+0x95 kern_writev(c4a9a480,4,e7113c58,e7113c78,1,...) at kern_writev+0x58 write(c4a9a480,e7113cf8,c,c0c87c46,c0d8bf90,...) at write+0x4f syscall(e7113d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (4, FreeBSD ELF32, write), eip = 0x2818e093, esp = 0xbfbfd91c, ebp = 0xbfbfe9c8 --- Tracing command rw pid 8985 tid 100209 td 0xc4be7240 sched_switch(c4be7240,0,104,191,6777ffa4,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,44,...) at mi_switch+0x200 sleepq_switch(c4be7240,0,c0ca4047,260,0,...) at sleepq_switch+0x15f sleepq_wait(c0f6ad60,44,c0caacc0,0,0,...) at sleepq_wait+0x63 _sleep(c0f6ad60,c0f6ad64,44,c0caacc0,0,...) at _sleep+0x36b waitrunningbufspace(c54ba254,d84e5500,375,d87a2800,c1e98510,...) at waitrunningbufspace+0x6a bufwrite(d84e5500,8,e6f819cc,c090eac7,d84e5500,...) at bufwrite+0x187 bawrite(d84e5500,d84e5688,20,3af,0,...) at bawrite+0x5c cluster_wbuild(c54ba15c,4000,1007,0,8,...) at cluster_wbuild+0x837 cluster_write(c54ba15c,d87a2800,401c000,0,7f,...) at cluster_write+0x696 ffs_write(e6f81bc4,c0cde7d3,c54ba15c,0,c54ba15c,...) at ffs_write+0x5eb VOP_WRITE_APV(c0daeb60,e6f81bc4,c54ba15c,25f,0,...) at VOP_WRITE_APV+0x136 vn_write(c5089268,e6f81c58,c4c03c80,0,c4be7240,...) at vn_write+0x1ca dofilewrite(e6f81c58,ffffffff,ffffffff,0,c5089268,...) at dofilewrite+0x95 kern_writev(c4be7240,4,e6f81c58,e6f81c78,1,...) at kern_writev+0x58 write(c4be7240,e6f81cf8,c,c0c87c46,c0d8bf90,...) at write+0x4f syscall(e6f81d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (4, FreeBSD ELF32, write), eip = 0x2818e093, esp = 0xbfbfd91c, ebp = 0xbfbfe9c8 --- Tracing command rw pid 8984 tid 100262 td 0xc529db40 sched_switch(c529db40,0,104,191,67ad5482,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,44,...) at mi_switch+0x200 sleepq_switch(c529db40,0,c0ca4047,260,0,...) at sleepq_switch+0x15f sleepq_wait(c0f6ad60,44,c0caacc0,0,0,...) at sleepq_wait+0x63 _sleep(c0f6ad60,c0f6ad64,44,c0caacc0,0,...) at _sleep+0x36b waitrunningbufspace(c5dd7254,d84e6880,375,d85117e0,c1f05bd8,...) at waitrunningbufspace+0x6a bufwrite(d84e6880,7,e702c9cc,c090eac7,d84e6880,...) at bufwrite+0x187 bawrite(d84e6880,d84e69f8,1c,3af,0,...) at bawrite+0x5c cluster_wbuild(c5dd715c,4000,f72,0,7,...) at cluster_wbuild+0x837 cluster_write(c5dd715c,d8520ee0,3dcc000,0,7f,...) at cluster_write+0x203 ffs_write(e702cbc4,c0cde7d3,c5dd715c,0,c5dd715c,...) at ffs_write+0x5eb VOP_WRITE_APV(c0daeb60,e702cbc4,c5dd715c,25f,0,...) at VOP_WRITE_APV+0x136 vn_write(c52cc150,e702cc58,c4c03c80,0,c529db40,...) at vn_write+0x1ca dofilewrite(e702cc58,ffffffff,ffffffff,0,c52cc150,...) at dofilewrite+0x95 kern_writev(c529db40,4,e702cc58,e702cc78,1,...) at kern_writev+0x58 write(c529db40,e702ccf8,c,c0c87c46,c0d8bf90,...) at write+0x4f syscall(e702cd38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (4, FreeBSD ELF32, write), eip = 0x2818e093, esp = 0xbfbfd91c, ebp = 0xbfbfe9c8 --- Tracing command rw pid 8983 tid 100290 td 0xc50ccd80 sched_switch(c50ccd80,0,104,191,6aa5e01e,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,50,...) at mi_switch+0x200 sleepq_switch(c50ccd80,0,c0ca4047,260,50,...) at sleepq_switch+0x15f sleepq_wait(d8693260,50,c0cab6d3,4,0,...) at sleepq_wait+0x63 __lockmgr_args(d8693260,81900,c4b767c4,c0cab6d3,50,...) at __lockmgr_args+0xb3f getblk(c4b766cc,113b20,0,4000,0,...) at getblk+0x163 breadn(c4b766cc,113b20,0,4000,0,...) at breadn+0x44 bread(c4b766cc,113b20,0,4000,0,...) at bread+0x4c ffs_alloccg(c59312b8,3,578b0,0,4000,...) at ffs_alloccg+0x10a ffs_hashalloc(578b0,0,4000,4000,c0aac330,...) at ffs_hashalloc+0x85 ffs_alloc(c59312b8,ff2,0,578b0,0,...) at ffs_alloc+0x2f2 ffs_balloc_ufs2(c4caf000,3fc8000,0,1000,c4c03c80,...) at ffs_balloc_ufs2+0x1a9c ffs_write(e7080bc4,c0cde7d3,c4caf000,0,c4caf000,...) at ffs_write+0x372 VOP_WRITE_APV(c0daeb60,e7080bc4,c4caf000,25f,0,...) at VOP_WRITE_APV+0x136 vn_write(c4b001f8,e7080c58,c4c03c80,0,c50ccd80,...) at vn_write+0x1ca dofilewrite(e7080c58,ffffffff,ffffffff,0,c4b001f8,...) at dofilewrite+0x95 kern_writev(c50ccd80,4,e7080c58,e7080c78,1,...) at kern_writev+0x58 write(c50ccd80,e7080cf8,c,c0c87c46,c0d8bf90,...) at write+0x4f syscall(e7080d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (4, FreeBSD ELF32, write), eip = 0x2818e093, esp = 0xbfbfd91c, ebp = 0xbfbfe9c8 --- Tracing command rw pid 8982 tid 100331 td 0xc4a7e240 sched_switch(c4a7e240,0,104,191,6aa42a86,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,50,...) at mi_switch+0x200 sleepq_switch(c4a7e240,0,c0ca4047,260,50,...) at sleepq_switch+0x15f sleepq_wait(d8693260,50,c0cab6d3,4,0,...) at sleepq_wait+0x63 __lockmgr_args(d8693260,81900,c4b767c4,c0cab6d3,50,...) at __lockmgr_args+0xb3f getblk(c4b766cc,113b20,0,4000,0,...) at getblk+0x163 breadn(c4b766cc,113b20,0,4000,0,...) at breadn+0x44 bread(c4b766cc,113b20,0,4000,0,...) at bread+0x4c ffs_checkblk(4000,d8636180,e712b954,2,fd8,...) at ffs_checkblk+0x1d6 ffs_reallocblks(e712ba38,c0cde46d,0,c54ba000,c58d5e28,...) at ffs_reallocblks+0x12e2 VOP_REALLOCBLKS_APV(c0daeb60,e712ba38,0,4000,0,...) at VOP_REALLOCBLKS_APV+0xd6 cluster_write(c54ba000,d86f4ee0,3f6c000,0,7f,...) at cluster_write+0x3c2 ffs_write(e712bbc4,c0cde7d3,c54ba000,0,c54ba000,...) at ffs_write+0x5eb VOP_WRITE_APV(c0daeb60,e712bbc4,c54ba000,25f,0,...) at VOP_WRITE_APV+0x136 vn_write(c6762a10,e712bc58,c4c03c80,0,c4a7e240,...) at vn_write+0x1ca dofilewrite(e712bc58,ffffffff,ffffffff,0,c6762a10,...) at dofilewrite+0x95 kern_writev(c4a7e240,4,e712bc58,e712bc78,1,...) at kern_writev+0x58 write(c4a7e240,e712bcf8,c,c0c87c46,c0d8bf90,...) at write+0x4f syscall(e712bd38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (4, FreeBSD ELF32, write), eip = 0x2818e093, esp = 0xbfbfd91c, ebp = 0xbfbfe9c8 --- Tracing command rw pid 8981 tid 100067 td 0xc481bd80 sched_switch(c481bd80,0,104,191,6aa3ed62,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,50,...) at mi_switch+0x200 sleepq_switch(c481bd80,0,c0ca4047,260,50,...) at sleepq_switch+0x15f sleepq_wait(d8693260,50,c0cab6d3,4,0,...) at sleepq_wait+0x63 __lockmgr_args(d8693260,81900,c4b767c4,c0cab6d3,50,...) at __lockmgr_args+0xb3f getblk(c4b766cc,113b20,0,4000,0,...) at getblk+0x163 breadn(c4b766cc,113b20,0,4000,0,...) at breadn+0x44 bread(c4b766cc,113b20,0,4000,0,...) at bread+0x4c ffs_blkfree(c4a7a500,c4ad3800,c4b766cc,4a728,0,...) at ffs_blkfree+0x337 handle_workitem_freefrag(c0f769ac,0,c0cc60e7,ffa,781,...) at handle_workitem_freefrag+0x62 setup_allocindir_phase2(c5560380,fb7,0,0,fb7,...) at setup_allocindir_phase2+0x349 softdep_setup_allocindir_page(c5426e80,d8744f60,e6d16954,2,fb7,...) at softdep_setup_allocindir_page+0x102 ffs_reallocblks(e6d16a38,c0cde46d,0,c607a828,c635ad2c,...) at ffs_reallocblks+0x142e VOP_REALLOCBLKS_APV(c0daeb60,e6d16a38,0,4000,0,...) at VOP_REALLOCBLKS_APV+0xd6 cluster_write(c607a828,d877cec0,3eec000,0,7f,...) at cluster_write+0x3c2 ffs_write(e6d16bc4,c0cde7d3,c607a828,0,c607a828,...) at ffs_write+0x5eb VOP_WRITE_APV(c0daeb60,e6d16bc4,c607a828,25f,0,...) at VOP_WRITE_APV+0x136 vn_write(c4b01038,e6d16c58,c4c03c80,0,c481bd80,...) at vn_write+0x1ca dofilewrite(e6d16c58,ffffffff,ffffffff,0,c4b01038,...) at dofilewrite+0x95 kern_writev(c481bd80,4,e6d16c58,e6d16c78,1,...) at kern_writev+0x58 write(c481bd80,e6d16cf8,c,c0c87c46,c0d8bf90,...) at write+0x4f syscall(e6d16d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (4, FreeBSD ELF32, write), eip = 0x2818e093, esp = 0xbfbfd91c, ebp = 0xbfbfe9c8 --- Tracing command rw pid 8980 tid 100138 td 0xc4c3cb40 sched_switch(c4c3cb40,0,104,191,6a99a14c,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,44,...) at mi_switch+0x200 sleepq_switch(c4c3cb40,0,c0ca4047,260,0,...) at sleepq_switch+0x15f sleepq_wait(c0f6ad60,44,c0caacc0,0,0,...) at sleepq_wait+0x63 _sleep(c0f6ad60,c0f6ad64,44,c0caacc0,0,...) at _sleep+0x36b waitrunningbufspace(c5dd7920,d84e6540,375,d87a1140,c20bb080,...) at waitrunningbufspace+0x6a bufwrite(d84e6540,8,e6e4b9cc,c090eac7,d84e6540,...) at bufwrite+0x187 bawrite(d84e6540,d84e66c8,20,3af,0,...) at bawrite+0x5c cluster_wbuild(c5dd7828,4000,f96,0,8,...) at cluster_wbuild+0x837 cluster_write(c5dd7828,d87a1140,3e58000,0,7f,...) at cluster_write+0x696 ffs_write(e6e4bbc4,c0cde7d3,c5dd7828,0,c5dd7828,...) at ffs_write+0x5eb VOP_WRITE_APV(c0daeb60,e6e4bbc4,c5dd7828,25f,0,...) at VOP_WRITE_APV+0x136 vn_write(c6762038,e6e4bc58,c4c03c80,0,c4c3cb40,...) at vn_write+0x1ca dofilewrite(e6e4bc58,ffffffff,ffffffff,0,c6762038,...) at dofilewrite+0x95 kern_writev(c4c3cb40,4,e6e4bc58,e6e4bc78,1,...) at kern_writev+0x58 write(c4c3cb40,e6e4bcf8,c,c0c87c46,c0d8bf90,...) at write+0x4f syscall(e6e4bd38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (4, FreeBSD ELF32, write), eip = 0x2818e093, esp = 0xbfbfd91c, ebp = 0xbfbfe9c8 --- Tracing command rw pid 8979 tid 100281 td 0xc5aa5900 sched_switch(c5aa5900,0,104,191,6aa93cc4,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,50,...) at mi_switch+0x200 sleepq_switch(c5aa5900,0,c0ca4047,260,50,...) at sleepq_switch+0x15f sleepq_wait(d8693260,50,c0cab6d3,4,0,...) at sleepq_wait+0x63 __lockmgr_args(d8693260,81900,c4b767c4,c0cab6d3,50,...) at __lockmgr_args+0xb3f getblk(c4b766cc,113b20,0,4000,0,...) at getblk+0x163 breadn(c4b766cc,113b20,0,4000,0,...) at breadn+0x44 bread(c4b766cc,113b20,0,4000,0,...) at bread+0x4c ffs_checkblk(4000,3,c5aa5900,e7065824,246,...) at ffs_checkblk+0x1d6 ffs_reallocblks(e7065a38,c0cde46d,0,c6611828,c5b73c28,...) at ffs_reallocblks+0xc69 VOP_REALLOCBLKS_APV(c0daeb60,e7065a38,0,4000,0,...) at VOP_REALLOCBLKS_APV+0xd6 cluster_write(c6611828,d8506d40,4008000,0,7f,...) at cluster_write+0x3c2 ffs_write(e7065bc4,c0cde7d3,c6611828,0,c6611828,...) at ffs_write+0x5eb VOP_WRITE_APV(c0daeb60,e7065bc4,c6611828,25f,0,...) at VOP_WRITE_APV+0x136 vn_write(c4c1c2a0,e7065c58,c4c03c80,0,c5aa5900,...) at vn_write+0x1ca dofilewrite(e7065c58,ffffffff,ffffffff,0,c4c1c2a0,...) at dofilewrite+0x95 kern_writev(c5aa5900,4,e7065c58,e7065c78,1,...) at kern_writev+0x58 write(c5aa5900,e7065cf8,c,c0c87c46,c0d8bf90,...) at write+0x4f syscall(e7065d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (4, FreeBSD ELF32, write), eip = 0x2818e093, esp = 0xbfbfd91c, ebp = 0xbfbfe9c8 --- Tracing command rw pid 8976 tid 100273 td 0xc47d5b40 sched_switch(c47d5b40,0,104,191,6aa7e84c,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,50,...) at mi_switch+0x200 sleepq_switch(c47d5b40,0,c0ca4047,260,50,...) at sleepq_switch+0x15f sleepq_wait(d8693260,50,c0cab6d3,4,0,...) at sleepq_wait+0x63 __lockmgr_args(d8693260,81900,c4b767c4,c0cab6d3,50,...) at __lockmgr_args+0xb3f getblk(c4b766cc,113b20,0,4000,0,...) at getblk+0x163 breadn(c4b766cc,113b20,0,4000,0,...) at breadn+0x44 bread(c4b766cc,113b20,0,4000,0,...) at bread+0x4c ffs_blkfree(c4a7a500,c4ad3800,c4b766cc,57d00,0,...) at ffs_blkfree+0x337 handle_workitem_freefrag(c0f769ac,0,c0cc60e7,ffa,781,...) at handle_workitem_freefrag+0x62 setup_allocindir_phase2(c5585700,f4c,0,0,f4c,...) at setup_allocindir_phase2+0x349 softdep_setup_allocindir_page(c5fa3000,d87af4c0,e704d954,2,f4c,...) at softdep_setup_allocindir_page+0x102 ffs_reallocblks(e704da38,c0cde46d,0,c62b9414,c60ffb6c,...) at ffs_reallocblks+0x142e VOP_REALLOCBLKS_APV(c0daeb60,e704da38,0,4000,0,...) at VOP_REALLOCBLKS_APV+0xd6 cluster_write(c62b9414,d8524140,3d38000,0,7f,...) at cluster_write+0x3c2 ffs_write(e704dbc4,c0cde7d3,c62b9414,0,c62b9414,...) at ffs_write+0x5eb VOP_WRITE_APV(c0daeb60,e704dbc4,c62b9414,25f,0,...) at VOP_WRITE_APV+0x136 vn_write(c6762b98,e704dc58,c4c03c80,0,c47d5b40,...) at vn_write+0x1ca dofilewrite(e704dc58,ffffffff,ffffffff,0,c6762b98,...) at dofilewrite+0x95 kern_writev(c47d5b40,4,e704dc58,e704dc78,1,...) at kern_writev+0x58 write(c47d5b40,e704dcf8,c,c0c87c46,c0d8bf90,...) at write+0x4f syscall(e704dd38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (4, FreeBSD ELF32, write), eip = 0x2818e093, esp = 0xbfbfd91c, ebp = 0xbfbfe9c8 --- Tracing command rw pid 8974 tid 100152 td 0xc50fb900 sched_switch(c50fb900,0,104,191,6561853c,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,5c,...) at mi_switch+0x200 sleepq_switch(c50fb900,0,c0ca4047,1a0,5c,...) at sleepq_switch+0x15f sleepq_catch_signals(c0ca4047,160,0,100,100,...) at sleepq_catch_signals+0xb7 sleepq_wait_sig(c5178d48,5c,c0ca6869,100,0,...) at sleepq_wait_sig+0x17 _sleep(c5178d48,c5178dd0,15c,c0ca6869,0,...) at _sleep+0x354 kern_wait(c50fb900,2310,e6e75c74,0,0,...) at kern_wait+0xb76 wait4(c50fb900,e6e75cf8,10,c0ca6699,c0d8bfe4,...) at wait4+0x3b syscall(e6e75d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (7, FreeBSD ELF32, wait4), eip = 0x2810107b, esp = 0xbfbfe9cc, ebp = 0xbfbfe9e8 --- Tracing command rw pid 8973 tid 100227 td 0xc49a9000 sched_switch(c49a9000,0,104,191,fe1d1fe8,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,5c,...) at mi_switch+0x200 sleepq_switch(c49a9000,0,c0ca4047,1a0,5c,...) at sleepq_switch+0x15f sleepq_catch_signals(c0ca4047,160,0,100,100,...) at sleepq_catch_signals+0xb7 sleepq_wait_sig(c4a4eaa0,5c,c0ca6869,100,0,...) at sleepq_wait_sig+0x17 _sleep(c4a4eaa0,c4a4eb28,15c,c0ca6869,0,...) at _sleep+0x354 kern_wait(c49a9000,230e,e6fbcc74,0,0,...) at kern_wait+0xb76 wait4(c49a9000,e6fbccf8,10,c49a9000,c0d8bfe4,...) at wait4+0x3b syscall(e6fbcd38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (7, FreeBSD ELF32, wait4), eip = 0x2810107b, esp = 0xbfbfe9fc, ebp = 0xbfbfea18 --- Tracing command awk pid 1193 tid 100164 td 0xc5191d80 sched_switch(c5191d80,0,104,191,96980c62,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,4c,...) at mi_switch+0x200 sleepq_switch(c5191d80,0,c0ca4047,1a0,4c,...) at sleepq_switch+0x15f sleepq_catch_signals(c0ca4047,160,0,100,100,...) at sleepq_catch_signals+0xb7 sleepq_wait_sig(c4b2c498,4c,c0ca6480,100,0,...) at sleepq_wait_sig+0x17 _sleep(c4b2c498,c4b2c608,14c,c0ca6480,0,...) at _sleep+0x354 pipe_read(c4b3bb60,e6ea1c58,c4aa7e00,0,c5191d80,...) at pipe_read+0x417 dofileread(e6ea1c58,ffffffff,ffffffff,0,c4b3bb60,...) at dofileread+0x96 kern_readv(c5191d80,0,e6ea1c58,e6ea1c78,1,...) at kern_readv+0x58 read(c5191d80,e6ea1cf8,c,c5191d80,c0d8bf74,...) at read+0x4f syscall(e6ea1d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (3, FreeBSD ELF32, read), eip = 0x281b20b3, esp = 0xbfbfe97c, ebp = 0xbfbfe998 --- Tracing command sh pid 1192 tid 100163 td 0xc5192000 sched_switch(c5192000,0,104,191,96c554be,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,5c,...) at mi_switch+0x200 sleepq_switch(c5192000,0,c0ca4047,1a0,5c,...) at sleepq_switch+0x15f sleepq_catch_signals(c0ca4047,160,0,100,100,...) at sleepq_catch_signals+0xb7 sleepq_wait_sig(c515a7f8,5c,c0ca6869,100,0,...) at sleepq_wait_sig+0x17 _sleep(c515a7f8,c515a880,15c,c0ca6869,0,...) at _sleep+0x354 kern_wait(c5192000,ffffffff,e6e9dc74,2,0,...) at kern_wait+0xb76 wait4(c5192000,e6e9dcf8,10,c0ca6848,c0d8bfe4,...) at wait4+0x3b syscall(e6e9dd38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (7, FreeBSD ELF32, wait4), eip = 0x2815e07b, esp = 0xbfbfe95c, ebp = 0xbfbfe978 --- Tracing command sh pid 1191 tid 100060 td 0xc4b1a6c0 sched_switch(c4b1a6c0,0,104,191,4221f506,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,5c,...) at mi_switch+0x200 sleepq_switch(c4b1a6c0,0,c0ca4047,1a0,5c,...) at sleepq_switch+0x15f sleepq_catch_signals(c0ca4047,160,0,100,100,...) at sleepq_catch_signals+0xb7 sleepq_wait_sig(c474baa0,5c,c0ca6869,100,0,...) at sleepq_wait_sig+0x17 _sleep(c474baa0,c474bb28,15c,c0ca6869,0,...) at _sleep+0x354 kern_wait(c4b1a6c0,ffffffff,e6cf1c74,2,0,...) at kern_wait+0xb76 wait4(c4b1a6c0,e6cf1cf8,10,c4b1a6c0,c0d8bfe4,...) at wait4+0x3b syscall(e6cf1d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (7, FreeBSD ELF32, wait4), eip = 0x2815e07b, esp = 0xbfbfeb6c, ebp = 0xbfbfeb88 --- Tracing command sshd pid 1190 tid 100105 td 0xc4c01b40 Tracing command tail pid 1189 tid 100162 td 0xc5192240 sched_switch(c5192240,0,104,191,5715a83a,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,58,...) at mi_switch+0x200 sleepq_switch(c5192240,0,c0ca4047,1a0,58,...) at sleepq_switch+0x15f sleepq_catch_signals(3e9,c08c8460,c5192240,2,100,...) at sleepq_catch_signals+0xb7 sleepq_timedwait_sig(c51c5180,58,c0c9ac12,100,0,...) at sleepq_timedwait_sig+0x1a _sleep(c51c5180,c51c5180,158,c0c9ac12,3e9,...) at _sleep+0x31e kern_kevent(c5192240,4,0,1,e6e99c58,...) at kern_kevent+0x364 kevent(c5192240,e6e99cf8,18,c0c3ee49,c0d8e6d4,...) at kevent+0x19b syscall(e6e99d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (363, FreeBSD ELF32, kevent), eip = 0x2815fcbb, esp = 0xbfbfeb5c, ebp = 0xbfbfec18 --- Tracing command sshd pid 1188 tid 100139 td 0xc51746c0 Tracing command top pid 1181 tid 100065 td 0xc4b46900 cpustop_handler(2,e6d08a54,c0bcc5e6,c084bb4c,c0e02b40,...) at cpustop_handler+0x32 ipi_nmi_handler(c084bb4c,c0e02b40,0,c0c996ca,c4b40d48,...) at ipi_nmi_handler+0x2f trap(e6d08a60) at trap+0x36 calltrap() at calltrap+0x6 --- trap 0x13, eip = 0xc084b963, esp = 0xe6d08aa0, ebp = 0xe6d08ab4 --- tvtohz(e6d08c28,e6d08c30,c52b2180,c4b46900,315b1b38,...) at tvtohz+0x73 kern_select(c4b46900,2,bfbfebec,0,0,e6d08c70,20,1,0) at kern_select+0x4e0 select(c4b46900,e6d08cf8,14,c0c87c46,c0d8c94c,...) at select+0x66 syscall(e6d08d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (93, FreeBSD ELF32, select), eip = 0x281e8033, esp = 0xbfbfeb8c, ebp = 0xbfbfece8 --- Tracing command sshd pid 1180 tid 100083 td 0xc4bfe480 sched_switch(c4bfe480,0,104,191,ff012e10,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,0,...) at mi_switch+0x200 sleepq_switch(c4bfe480,0,c0ca4047,1a0,0,...) at sleepq_switch+0x15f sleepq_catch_signals(c087f06a,c51a4750,0,c0c9e273,c4bfe480,...) at sleepq_catch_signals+0xb7 sleepq_wait_sig(c51a4764,0,e6d61a7c,101,0,...) at sleepq_wait_sig+0x17 _cv_wait_sig(c51a4764,c51a4750,c0ca62cf,603,c4b2b7a8,...) at _cv_wait_sig+0x240 seltdwait(c4b2b7a8,58,c51abb00,c4bfe480,200246,...) at seltdwait+0xa2 kern_select(c4bfe480,a,286030b8,286030dc,0,0,20,0,28100c70) at kern_select+0x4f4 select(c4bfe480,e6d61cf8,14,c0c87c46,c0d8c94c,...) at select+0x66 syscall(e6d61d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (93, FreeBSD ELF32, select), eip = 0x283ce033, esp = 0xbfbfde5c, ebp = 0xbfbfdea8 --- Tracing command sshd pid 1176 tid 100056 td 0xc4afa6c0 Tracing command sshd pid 1175 tid 100057 td 0xc4b47000 Tracing command sshd pid 1174 tid 100058 td 0xc4b46d80 Tracing command bash pid 1111 tid 100094 td 0xc4c3c240 sched_switch(c4c3c240,0,104,191,9ef02d02,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,5c,...) at mi_switch+0x200 sleepq_switch(c4c3c240,0,c0ca4047,1a0,5c,...) at sleepq_switch+0x15f sleepq_catch_signals(c0ca4047,160,0,100,100,...) at sleepq_catch_signals+0xb7 sleepq_wait_sig(c4c397f8,5c,c0ca6869,100,0,...) at sleepq_wait_sig+0x17 _sleep(c4c397f8,c4c39880,15c,c0ca6869,0,...) at _sleep+0x354 kern_wait(c4c3c240,ffffffff,e6d89c74,6,0,...) at kern_wait+0xb76 wait4(c4c3c240,e6d89cf8,10,c0ca6758,c0d8bfe4,...) at wait4+0x3b syscall(e6d89d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (7, FreeBSD ELF32, wait4), eip = 0x282be07b, esp = 0xbfbfe77c, ebp = 0xbfbfe798 --- Tracing command sshd pid 1110 tid 100106 td 0xc4c01900 sched_switch(c4c01900,0,104,191,a0c68900,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,0,...) at mi_switch+0x200 sleepq_switch(c4c01900,0,c0ca4047,1a0,0,...) at sleepq_switch+0x15f sleepq_catch_signals(c087f06a,c4fe8790,0,c0c9e273,c4c01900,...) at sleepq_catch_signals+0xb7 sleepq_wait_sig(c4fe87a4,0,e6de5a7c,101,0,...) at sleepq_wait_sig+0x17 _cv_wait_sig(c4fe87a4,c4fe8790,c0ca62cf,603,c4b007a8,...) at _cv_wait_sig+0x240 seltdwait(c4b007a8,58,c4c04800,c4c01900,200246,...) at seltdwait+0xa2 kern_select(c4c01900,a,286030b8,286030dc,0,0,20,0,28100c70) at kern_select+0x4f4 select(c4c01900,e6de5cf8,14,c0c87c46,c0d8c94c,...) at select+0x66 syscall(e6de5d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (93, FreeBSD ELF32, select), eip = 0x283ce033, esp = 0xbfbfde5c, ebp = 0xbfbfdea8 --- Tracing command sshd pid 1108 tid 100088 td 0xc481b6c0 Tracing command getty pid 1107 tid 100124 td 0xc4b186c0 Tracing command getty pid 1106 tid 100123 td 0xc4b18900 Tracing command getty pid 1105 tid 100122 td 0xc4b18b40 Tracing command getty pid 1104 tid 100121 td 0xc4b18d80 Tracing command getty pid 1103 tid 100120 td 0xc4b1a000 Tracing command getty pid 1102 tid 100119 td 0xc4b1a240 Tracing command getty pid 1101 tid 100091 td 0xc4c3c900 Tracing command getty pid 1100 tid 100059 td 0xc4b1a900 Tracing command inetd pid 1078 tid 100064 td 0xc4afa240 Tracing command moused pid 1054 tid 100068 td 0xc481bb40 Tracing command watchdogd pid 1036 tid 100111 td 0xc4d1d6c0 sched_switch(c4d1d6c0,0,104,191,36a6a13c,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,5c,...) at mi_switch+0x200 sleepq_switch(c4d1d6c0,0,c0ca4047,1a0,5c,...) at sleepq_switch+0x15f sleepq_catch_signals(ea61,c08c8460,c4d1d6c0,3,100,...) at sleepq_catch_signals+0xb7 sleepq_timedwait_sig(c0dfe6c4,5c,c0ca0d98,100,0,...) at sleepq_timedwait_sig+0x1a _sleep(c0dfe6c4,0,15c,c0ca0d98,ea61,...) at _sleep+0x31e kern_nanosleep(c4d1d6c0,e6df4c64,e6df4c6c,3c,0,...) at kern_nanosleep+0xc1 nanosleep(c4d1d6c0,e6df4cf8,8,c0cb7cc3,c0d8d960,...) at nanosleep+0x6f syscall(e6df4d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (240, FreeBSD ELF32, nanosleep), eip = 0x28185ad7, esp = 0xbfbfecec, ebp = 0xbfbfed18 --- Tracing command cron pid 1014 tid 100095 td 0xc4c3c000 sched_switch(c4c3c000,0,104,191,a71768c6,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,5c,...) at mi_switch+0x200 sleepq_switch(c4c3c000,0,c0ca4047,1a0,5c,...) at sleepq_switch+0x15f sleepq_catch_signals(ea61,c08c8460,c4c3c000,3,100,...) at sleepq_catch_signals+0xb7 sleepq_timedwait_sig(c0dfe6c4,5c,c0ca0d98,100,0,...) at sleepq_timedwait_sig+0x1a _sleep(c0dfe6c4,0,15c,c0ca0d98,ea61,...) at _sleep+0x31e kern_nanosleep(c4c3c000,e6d8cc64,e6d8cc6c,3c,0,...) at kern_nanosleep+0xc1 nanosleep(c4c3c000,e6d8ccf8,8,c0ca69a6,c0d8d960,...) at nanosleep+0x6f syscall(e6d8cd38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (240, FreeBSD ELF32, nanosleep), eip = 0x28178ad7, esp = 0xbfbfec8c, ebp = 0xbfbfecb8 --- Tracing command sendmail pid 1008 tid 100115 td 0xc4d1cd80 sched_switch(c4d1cd80,0,104,191,72b572a,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,68,...) at mi_switch+0x200 sleepq_switch(c4d1cd80,0,c0ca4047,1a0,68,...) at sleepq_switch+0x15f sleepq_catch_signals(c0ca4047,160,0,100,100,...) at sleepq_catch_signals+0xb7 sleepq_wait_sig(c4c3aaf8,68,c0c5373e,100,0,...) at sleepq_wait_sig+0x17 _sleep(c4c3aaf8,c4c3ab28,168,c0c5373e,0,...) at _sleep+0x354 kern_sigsuspend(c4d1cd80,0,0,0,0,...) at kern_sigsuspend+0xae sigsuspend(c4d1cd80,e6e00cf8,4,c0ca6758,c0d8e46c,...) at sigsuspend+0x4d syscall(e6e00d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (4, FreeBSD ELF32, write), eip = 0x28332efb, esp = 0xbfbfcf8c, ebp = 0xbfbfcfb8 --- Tracing command sendmail pid 1004 tid 100071 td 0xc4b46480 sched_switch(c4b46480,0,104,191,b77b2ec6,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,0,...) at mi_switch+0x200 sleepq_switch(c4b46480,0,c0ca4047,1a0,0,...) at sleepq_switch+0x15f sleepq_catch_signals(e6d2aa4c,c087f06a,c4abe850,0,c4b46480,...) at sleepq_catch_signals+0xb7 sleepq_timedwait_sig(c4abe864,0,e6d2aa7c,101,0,...) at sleepq_timedwait_sig+0x1a _cv_timedwait_sig(c4abe864,c4abe850,1389,603,c4b00700,...) at _cv_timedwait_sig+0x250 seltdwait(e6d2ac28,e6d2ac30,c4c04100,c4b46480,c188b014,...) at seltdwait+0x8a kern_select(c4b46480,5,bfbfc510,0,0,e6d2ac70,20,5,0) at kern_select+0x4f4 select(c4b46480,e6d2acf8,14,c0ca6c7c,c0d8c94c,...) at select+0x66 syscall(e6d2ad38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (93, FreeBSD ELF32, select), eip = 0x283d7033, esp = 0xbfbfc47c, ebp = 0xbfbfcfa8 --- Tracing command sshd pid 997 tid 100090 td 0xc481b240 Tracing command ntpd pid 960 tid 100093 td 0xc4c3c480 sched_switch(c4c3c480,0,104,191,200ceb70,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,0,...) at mi_switch+0x200 sleepq_switch(c4c3c480,0,c0ca4047,1a0,0,...) at sleepq_switch+0x15f sleepq_catch_signals(c087f06a,c4a78990,0,c0c9e273,c4c3c480,...) at sleepq_catch_signals+0xb7 sleepq_wait_sig(c4a789a4,0,e6d86a7c,101,0,...) at sleepq_wait_sig+0x17 _cv_wait_sig(c4a789a4,c4a78990,c0ca62cf,603,c4b2bb28,...) at _cv_wait_sig+0x240 seltdwait(c4b2bb28,58,c4578100,c4c3c480,0,...) at seltdwait+0xa2 kern_select(c4c3c480,1c,bfbfed28,0,0,0,20,e6d86c98,246) at kern_select+0x4f4 select(c4c3c480,e6d86cf8,14,c4c3c480,c0d8c94c,...) at select+0x66 syscall(e6d86d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (93, FreeBSD ELF32, select), eip = 0x28353033, esp = 0xbfbfecfc, ebp = 0xbfbfedc8 --- Tracing command nfsd pid 859 tid 100118 td 0xc4b43240 sched_switch(c4b43240,0,104,191,b27f6876,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,0,...) at mi_switch+0x200 sleepq_switch(c4b43240,0,c0ca4047,1a0,0,...) at sleepq_switch+0x15f sleepq_catch_signals(e6e0bbf8,c087f06a,c48ff100,0,c4b43240,...) at sleepq_catch_signals+0xb7 sleepq_timedwait_sig(c4abe690,0,e6e0bc28,101,0,...) at sleepq_timedwait_sig+0x1a _cv_timedwait_sig(c4abe690,c48ff100,1388,3af,5a5a5a5a,...) at _cv_timedwait_sig+0x250 svc_run_internal(e6e0bd24,c0864bf8,c48ff100,e6e0bd38,c0c9afc8,...) at svc_run_internal+0x356 svc_thread_start(c48ff100,e6e0bd38,c0c9afc8,343,c4b40000,...) at svc_thread_start+0x10 fork_exit(c0a848c0,c48ff100,e6e0bd38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0x2e, eip = 0xc, esp = 0x33, ebp = 0 --- Tracing command nfsd pid 859 tid 100117 td 0xc4b43480 sched_switch(c4b43480,0,104,191,b51c3740,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,0,...) at mi_switch+0x200 sleepq_switch(c4b43480,0,c0ca4047,1a0,0,...) at sleepq_switch+0x15f sleepq_catch_signals(e6e08bf8,c087f06a,c48ff100,0,c4b43480,...) at sleepq_catch_signals+0xb7 sleepq_timedwait_sig(c4a431d0,0,e6e08c28,101,0,...) at sleepq_timedwait_sig+0x1a _cv_timedwait_sig(c4a431d0,c48ff100,1388,3af,5a5a5a5a,...) at _cv_timedwait_sig+0x250 svc_run_internal(e6e08d24,c0864bf8,c48ff100,e6e08d38,c0c9afc8,...) at svc_run_internal+0x356 svc_thread_start(c48ff100,e6e08d38,c0c9afc8,343,c4b40000,...) at svc_thread_start+0x10 fork_exit(c0a848c0,c48ff100,e6e08d38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0x2e, eip = 0xc, esp = 0x33, ebp = 0 --- Tracing command nfsd pid 859 tid 100116 td 0xc4b436c0 sched_switch(c4b436c0,0,104,191,b264d39c,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,0,...) at mi_switch+0x200 sleepq_switch(c4b436c0,0,c0ca4047,1a0,0,...) at sleepq_switch+0x15f sleepq_catch_signals(e6e05bf8,c087f06a,c48ff100,0,c4b436c0,...) at sleepq_catch_signals+0xb7 sleepq_timedwait_sig(c4abe650,0,e6e05c28,101,0,...) at sleepq_timedwait_sig+0x1a _cv_timedwait_sig(c4abe650,c48ff100,1388,3af,5a5a5a5a,...) at _cv_timedwait_sig+0x250 svc_run_internal(e6e05d24,c0864bf8,c48ff100,e6e05d38,c0c9afc8,...) at svc_run_internal+0x356 svc_thread_start(c48ff100,e6e05d38,c0c9afc8,343,c4b40000,...) at svc_thread_start+0x10 fork_exit(c0a848c0,c48ff100,e6e05d38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0x2e, eip = 0xc, esp = 0x33, ebp = 0 --- Tracing command nfsd pid 859 tid 100074 td 0xc4b43d80 sched_switch(c4b43d80,0,104,191,b50bbeaa,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,0,...) at mi_switch+0x200 sleepq_switch(c4b43d80,0,c0ca4047,1a0,0,...) at sleepq_switch+0x15f sleepq_catch_signals(e6d36ae8,c087f06a,c48ff100,0,c4b43d80,...) at sleepq_catch_signals+0xb7 sleepq_timedwait_sig(c4a78b10,0,e6d36b18,101,0,...) at sleepq_timedwait_sig+0x1a _cv_timedwait_sig(c4a78b10,c48ff100,1388,3af,e6d36b60,...) at _cv_timedwait_sig+0x250 svc_run_internal(c4b43ee4,14,c0cc3330,c0cc1de4,e6d36c3c,...) at svc_run_internal+0x356 svc_run(c48ff100,0,c0cc20e8,1fd,0,...) at svc_run+0x7f nfssvc_nfsd(bfbfe8b0,e6d36c3c,c,c4578100,e6d36c50,...) at nfssvc_nfsd+0xad nfssvc_nfsserver(c4b43d80,e6d36cf8,bfbfe8b0,c4b43d80,c4b40000,...) at nfssvc_nfsserver+0x24f nfssvc(c4b43d80,e6d36cf8,8,c0ca6c7c,c0d8d014,...) at nfssvc+0x83 syscall(e6d36d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (155, FreeBSD ELF32, nfssvc), eip = 0x280daadb, esp = 0xbfbfe86c, ebp = 0xbfbfead8 --- Tracing command nfsd pid 858 tid 100097 td 0xc4c3bb40 Tracing command mountd pid 856 tid 100089 td 0xc481b480 Tracing command rpcbind pid 761 tid 100114 td 0xc4d1d000 sched_switch(c4d1d000,0,104,191,4baa835c,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,0,...) at mi_switch+0x200 sleepq_switch(c4d1d000,0,c0ca4047,1a0,0,...) at sleepq_switch+0x15f sleepq_catch_signals(e6dfdaa8,c087f06a,c4a44a10,0,c4d1d000,...) at sleepq_catch_signals+0xb7 sleepq_timedwait_sig(c4a44a24,0,e6dfdad8,101,0,...) at sleepq_timedwait_sig+0x1a _cv_timedwait_sig(c4a44a24,c4a44a10,7531,603,e6dfdb8c,...) at _cv_timedwait_sig+0x250 seltdwait(e6dfdc5c,e6dfdc64,511,c4d1d000,e6dfdb5c,...) at seltdwait+0x8a poll(c4d1d000,e6dfdcf8,c,c0ca69a6,c0d8d5fc,...) at poll+0x300 syscall(e6dfdd38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (209, FreeBSD ELF32, poll), eip = 0x2813f01f, esp = 0xbfbfcc0c, ebp = 0xbfbfedd8 --- Tracing command syslogd pid 741 tid 100103 td 0xc4c3b000 sched_switch(c4c3b000,0,104,191,4c194562,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,0,...) at mi_switch+0x200 sleepq_switch(c4c3b000,0,c0ca4047,1a0,0,...) at sleepq_switch+0x15f sleepq_catch_signals(c087f06a,c4abe5d0,0,c0c9e273,c4c3b000,...) at sleepq_catch_signals+0xb7 sleepq_wait_sig(c4abe5e4,0,e6ddca7c,101,0,...) at sleepq_wait_sig+0x17 _cv_wait_sig(c4abe5e4,c4abe5d0,c0ca62cf,603,c4b300e0,...) at _cv_wait_sig+0x240 seltdwait(c4b300e0,58,c4578100,c4c3b000,0,...) at seltdwait+0xa2 kern_select(c4c3b000,9,282290ac,0,0,0,20,0,281a5498) at kern_select+0x4f4 select(c4c3b000,e6ddccf8,14,c0ca6858,c0d8c94c,...) at select+0x66 syscall(e6ddcd38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (93, FreeBSD ELF32, select), eip = 0x28191033, esp = 0xbfbfe2ac, ebp = 0xbfbfee18 --- Tracing command devd pid 611 tid 100098 td 0xc4c3b900 Tracing command flowcleaner pid 19 tid 100050 td 0xc47676c0 sched_switch(c47676c0,0,104,191,78fcce0c,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,0,...) at mi_switch+0x200 sleepq_switch(c47676c0,0,c0ca4047,283,c47676c0,...) at sleepq_switch+0x15f sleepq_timedwait(c0f6b208,0,e4addcc4,1,0,...) at sleepq_timedwait+0x6b _cv_timedwait(c0f6b208,c0f6b210,2710,3f0,0,...) at _cv_timedwait+0x250 flowtable_cleaner(0,e4addd38,c0c9afc8,343,c4a59550,...) at flowtable_cleaner+0x1bf fork_exit(c0937cc0,0,e4addd38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe4addd70, ebp = 0 --- Tracing command softdepflush pid 18 tid 100049 td 0xc4767900 sched_switch(c4767900,0,104,191,448f4fda,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,44,...) at mi_switch+0x200 sleepq_switch(c4767900,0,c0ca4047,283,0,...) at sleepq_switch+0x15f sleepq_timedwait(c0f76a00,44,c0cc80a6,0,0,...) at sleepq_timedwait+0x6b _sleep(c0f76a00,c0f769ac,44,c0cc80a6,3e8,...) at _sleep+0x339 softdep_flush(0,e4adad38,c0c9afc8,343,c4a597f8,...) at softdep_flush+0x244 fork_exit(c0acbfc0,0,e4adad38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe4adad70, ebp = 0 --- Tracing command syncer pid 17 tid 100048 td 0xc4767b40 sched_switch(c4767b40,0,104,191,66272cca,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,0,...) at mi_switch+0x200 sleepq_switch(c4767b40,0,c0ca4047,283,c4767b40,...) at sleepq_switch+0x15f sleepq_timedwait(c0f6b014,0,e4ad7c88,1,0,...) at sleepq_timedwait+0x6b _cv_timedwait(c0f6b014,c0f6b000,3e8,6cc,4e20,...) at _cv_timedwait+0x250 sched_sync(0,e4ad7d38,c0c9afc8,343,c4a59aa0,...) at sched_sync+0x502 fork_exit(c0922990,0,e4ad7d38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe4ad7d70, ebp = 0 --- Tracing command vnlru pid 16 tid 100047 td 0xc4767d80 sched_switch(c4767d80,0,104,191,666332d0,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,50,...) at mi_switch+0x200 sleepq_switch(c4767d80,0,c0ca4047,283,0,...) at sleepq_switch+0x15f sleepq_timedwait(c4a59d48,50,c0cae07e,0,0,...) at sleepq_timedwait+0x6b _sleep(c4a59d48,c0f6afd4,250,c0cae07e,3e8,...) at _sleep+0x339 vnlru_proc(0,e4ad4d38,c0c9afc8,343,c4a59d48,...) at vnlru_proc+0xe7 fork_exit(c0923560,0,e4ad4d38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe4ad4d70, ebp = 0 --- Tracing command bufdaemon pid 15 tid 100046 td 0xc481a000 sched_switch(c481a000,0,104,191,2e00f3c0,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,44,...) at mi_switch+0x200 sleepq_switch(c481a000,0,c0ca4047,283,0,...) at sleepq_switch+0x15f sleepq_timedwait(c0f6ad48,44,c0cab5ec,0,0,...) at sleepq_timedwait+0x6b _sleep(c0f6ad48,c0f6ad4c,44,c0cab5ec,3e8,...) at _sleep+0x339 buf_daemon(0,e4ad1d38,c0c9afc8,343,c457b2a8,...) at buf_daemon+0x138 fork_exit(c090aba0,0,e4ad1d38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe4ad1d70, ebp = 0 --- Tracing command pagezero pid 9 tid 100045 td 0xc481a240 sched_switch(c481a240,0,104,191,74f521c0,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,0,...) at mi_switch+0x200 sleepq_switch(c481a240,0,c0ca4047,283,0,...) at sleepq_switch+0x15f sleepq_timedwait(c0f77814,0,c0ccdc39,0,0,...) at sleepq_timedwait+0x6b _sleep(c0f77814,c0f77300,0,c0ccdc39,493e0,...) at _sleep+0x339 vm_pagezero(0,e4aced38,c0c9afc8,343,c457b550,...) at vm_pagezero+0xdc fork_exit(c0b0a630,0,e4aced38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe4aced70, ebp = 0 --- Tracing command vmdaemon pid 8 tid 100044 td 0xc481a480 sched_switch(c481a480,0,104,191,e4e29db4,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,68,...) at mi_switch+0x200 sleepq_switch(c481a480,0,c0ca4047,260,0,...) at sleepq_switch+0x15f sleepq_wait(c0f77444,68,c0cab5ec,0,0,...) at sleepq_wait+0x63 _sleep(c0f77444,c0f77448,68,c0cab5ec,0,...) at _sleep+0x36b vm_daemon(0,e4acbd38,c0c9afc8,343,c457b7f8,...) at vm_daemon+0x59 fork_exit(c0b04ad0,0,e4acbd38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe4acbd70, ebp = 0 --- Tracing command pagedaemon pid 7 tid 100043 td 0xc481a6c0 sched_switch(c481a6c0,0,104,191,18770372,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,44,...) at mi_switch+0x200 sleepq_switch(c481a6c0,0,c0ca4047,283,0,...) at sleepq_switch+0x15f sleepq_timedwait(c0f7740c,44,c0cab5ec,0,0,...) at sleepq_timedwait+0x6b _sleep(c0f7740c,c0f77300,44,c0cab5ec,1388,...) at _sleep+0x339 vm_pageout(0,e4ac8d38,c0c9afc8,343,c457baa0,...) at vm_pageout+0x2bb fork_exit(c0b05970,0,e4ac8d38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe4ac8d70, ebp = 0 --- Tracing command fdc0 pid 6 tid 100041 td 0xc481ab40 sched_switch(c481ab40,0,104,191,66d30a7a,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,4c,...) at mi_switch+0x200 sleepq_switch(c481ab40,0,c0ca4047,283,0,...) at sleepq_switch+0x15f sleepq_timedwait(c482043c,4c,c0c9544e,0,0,...) at sleepq_timedwait+0x6b _sleep(c482043c,c48204f0,4c,c0c9544e,3e8,...) at _sleep+0x339 fdc_thread(c4820400,e4ac2d38,c0c9afc8,343,c457bd48,...) at fdc_thread+0x27d fork_exit(c0b87c50,c4820400,e4ac2d38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe4ac2d70, ebp = 0 --- Tracing command usb pid 14 tid 100034 td 0xc47656c0 sched_switch(c47656c0,0,104,191,b9263714,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,0,...) at mi_switch+0x200 sleepq_switch(c47656c0,0,c0ca4047,260,c47656c0,...) at sleepq_switch+0x15f sleepq_wait(c479edac,0,c4362cbc,1,0,...) at sleepq_wait+0x63 _cv_wait(c479edac,c479ee4c,c0c8e386,6c,c479edb4,...) at _cv_wait+0x240 usb_process(c479eda4,c4362d38,c0c9afc8,343,c474b000,...) at usb_process+0x193 fork_exit(c07bf940,c479eda4,c4362d38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc4362d70, ebp = 0 --- Tracing command usb pid 14 tid 100033 td 0xc4765900 sched_switch(c4765900,0,104,191,363ab1f2,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,0,...) at mi_switch+0x200 sleepq_switch(c4765900,0,c0ca4047,260,c4765900,...) at sleepq_switch+0x15f sleepq_wait(c479ed7c,0,c435fcbc,1,0,...) at sleepq_wait+0x63 _cv_wait(c479ed7c,c479ee4c,c0c8e386,6c,c479ed84,...) at _cv_wait+0x240 usb_process(c479ed74,c435fd38,c0c9afc8,343,c474b000,...) at usb_process+0x193 fork_exit(c07bf940,c479ed74,c435fd38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc435fd70, ebp = 0 --- Tracing command usb pid 14 tid 100032 td 0xc4765b40 sched_switch(c4765b40,0,104,191,b8b01550,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,0,...) at mi_switch+0x200 sleepq_switch(c4765b40,0,c0ca4047,260,c4765b40,...) at sleepq_switch+0x15f sleepq_wait(c479ed4c,0,c435ccbc,1,0,...) at sleepq_wait+0x63 _cv_wait(c479ed4c,c479ee4c,c0c8e386,6c,c479ed54,...) at _cv_wait+0x240 usb_process(c479ed44,c435cd38,c0c9afc8,343,c474b000,...) at usb_process+0x193 fork_exit(c07bf940,c479ed44,c435cd38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc435cd70, ebp = 0 --- Tracing command usb pid 14 tid 100031 td 0xc4765d80 sched_switch(c4765d80,0,104,191,b8afe124,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,0,...) at mi_switch+0x200 sleepq_switch(c4765d80,0,c0ca4047,260,c4765d80,...) at sleepq_switch+0x15f sleepq_wait(c479ed1c,0,c4359cbc,1,0,...) at sleepq_wait+0x63 _cv_wait(c479ed1c,c479ee4c,c0c8e386,6c,c479ed24,...) at _cv_wait+0x240 usb_process(c479ed14,c4359d38,c0c9afc8,343,c474b000,...) at usb_process+0x193 fork_exit(c07bf940,c479ed14,c4359d38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc4359d70, ebp = 0 --- Tracing command xpt_thrd pid 5 tid 100021 td 0xc4756480 sched_switch(c4756480,0,104,191,b8af9f54,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,4c,...) at mi_switch+0x200 sleepq_switch(c4756480,0,c0ca4047,260,0,...) at sleepq_switch+0x15f sleepq_wait(c0dca8d4,4c,c0c39ffa,0,0,...) at sleepq_wait+0x63 _sleep(c0dca8d4,c0dca8ec,4c,c0c39ffa,0,...) at _sleep+0x36b xpt_scanner_thread(0,c4314d38,c0c9afc8,343,c474b2a8,...) at xpt_scanner_thread+0x4a fork_exit(c0484aa0,0,c4314d38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc4314d70, ebp = 0 --- Tracing command yarrow pid 13 tid 100017 td 0xc457e240 sched_switch(c457e240,0,104,191,64ab96da,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,0,...) at mi_switch+0x200 sleepq_switch(c457e240,0,c0ca4047,283,2,...) at sleepq_switch+0x15f sleepq_timedwait(c0dfe524,0,c0c9544e,2,0,...) at sleepq_timedwait+0x6b _sleep(c0dfe524,0,0,c0c9544e,64,...) at _sleep+0x339 pause(c0c9544e,64,c0c82324,111,0,...) at pause+0x47 random_kthread(0,c4308d38,c0c9afc8,343,c474b550,...) at random_kthread+0x1ef fork_exit(c07382d0,0,c4308d38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc4308d70, ebp = 0 --- Tracing command g_down pid 4 tid 100016 td 0xc457e480 sched_switch(c457e480,0,104,191,6a9c320a,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,4c,...) at mi_switch+0x200 sleepq_switch(c457e480,0,c0ca4047,260,0,...) at sleepq_switch+0x15f sleepq_wait(c0dfc2e4,4c,c0c9544e,0,0,...) at sleepq_wait+0x63 _sleep(c0dfc2e4,c0dfc248,24c,c0c9544e,0,...) at _sleep+0x36b g_io_schedule_down(c457e480,0,c0c96b16,74,0,...) at g_io_schedule_down+0x56 g_down_procbody(0,c4305d38,c0c9afc8,343,c457a000,...) at g_down_procbody+0x8d fork_exit(c082bcc0,0,c4305d38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc4305d70, ebp = 0 --- Tracing command g_up pid 3 tid 100015 td 0xc457e6c0 kdb_enter(c0c9faeb,c0c9faeb,c0c39b08,c4302b54,3,...) at kdb_enter+0x3a panic(c0c39b08,c6964280,0,c70afe80,0,...) at panic+0x136 handle_allocindir_partdone(c0f769ac,0,c0cc60e7,1c3d,c0dfe7b4,...) at handle_allocindir_partdone+0x45 softdep_disk_write_complete(d85dbe80,0,c0cc84f6,6d5,c4b767c4,...) at softdep_disk_write_complete+0xd4b ffs_backgroundwritedone(d85dbe80,c1879000,d85dbe80,c4aa3980,c4302c98,...) at ffs_backgroundwritedone+0xa3 bufdone(d85dbe80,c454e95c) at bufdone+0x53 g_vfs_done(c4aa3980,0,c0caaca8,c16,c4aa3980,...) at g_vfs_done+0x85 biodone(c4aa3980,c0dfc268,24c,c0c9544e,0,...) at biodone+0xa5 g_io_schedule_up(c457e6c0,0,c0c96b16,5d,0,...) at g_io_schedule_up+0xc7 g_up_procbody(0,c4302d38,c0c9afc8,343,c457a2a8,...) at g_up_procbody+0x8d fork_exit(c082bd50,0,c4302d38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc4302d70, ebp = 0 --- Tracing command g_event pid 2 tid 100014 td 0xc457e900 sched_switch(c457e900,0,104,191,61aa34d4,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,4c,...) at mi_switch+0x200 sleepq_switch(c457e900,0,c0ca4047,283,0,...) at sleepq_switch+0x15f sleepq_timedwait(c0dfc2d8,4c,c0c9544e,0,0,...) at sleepq_timedwait+0x6b _sleep(c0dfc2d8,0,4c,c0c9544e,64,...) at _sleep+0x339 g_event_procbody(0,c42ffd38,c0c9afc8,343,c457a550,...) at g_event_procbody+0xcb fork_exit(c082bde0,0,c42ffd38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc42ffd70, ebp = 0 --- Tracing command intr pid 12 tid 100042 td 0xc481a900 fork_trampoline() at fork_trampoline Tracing command intr pid 12 tid 100040 td 0xc481ad80 sched_switch(c481ad80,0,109,191,6abce4d6,...) at sched_switch+0x406 mi_switch(109,0,c0c9b247,52d,c47b0570,...) at mi_switch+0x200 ithread_loop(c4815b20,e4ab5d38,c0c9afc8,343,c457a7f8,...) at ithread_loop+0x1f6 fork_exit(c0867be0,c4815b20,e4ab5d38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe4ab5d70, ebp = 0 --- Tracing command intr pid 12 tid 100039 td 0xc481b000 fork_trampoline() at fork_trampoline Tracing command intr pid 12 tid 100038 td 0xc4756d80 sched_switch(c4756d80,0,109,191,b8387920,...) at sched_switch+0x406 mi_switch(109,0,c0c9b247,52d,c4578070,...) at mi_switch+0x200 ithread_loop(c480dca0,e4aafd38,c0c9afc8,343,c457a7f8,...) at ithread_loop+0x1f6 fork_exit(c0867be0,c480dca0,e4aafd38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe4aafd70, ebp = 0 --- Tracing command intr pid 12 tid 100037 td 0xc4765000 fork_trampoline() at fork_trampoline Tracing command intr pid 12 tid 100036 td 0xc4765240 sched_switch(c4765240,0,109,191,6aa23ca2,...) at sched_switch+0x406 mi_switch(109,0,c0c9b247,52d,c45c1370,...) at mi_switch+0x200 ithread_loop(c480d820,e4aa6d38,c0c9afc8,343,c457a7f8,...) at ithread_loop+0x1f6 fork_exit(c0867be0,c480d820,e4aa6d38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe4aa6d70, ebp = 0 --- Tracing command intr pid 12 tid 100035 td 0xc4765480 sched_switch(c4765480,0,109,191,9dac98a,...) at sched_switch+0x406 mi_switch(109,0,c0c9b247,52d,c45c11f0,...) at mi_switch+0x200 ithread_loop(c47afb10,e4aa0d38,c0c9afc8,343,c457a7f8,...) at ithread_loop+0x1f6 fork_exit(c0867be0,c47afb10,e4aa0d38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe4aa0d70, ebp = 0 --- Tracing command intr pid 12 tid 100030 td 0xc4767000 sched_switch(c4767000,0,109,191,b8049ddc,...) at sched_switch+0x406 mi_switch(109,0,c0c9b247,52d,c45c1270,...) at mi_switch+0x200 ithread_loop(c4784830,c4356d38,c0c9afc8,343,c457a7f8,...) at ithread_loop+0x1f6 fork_exit(c0867be0,c4784830,c4356d38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc4356d70, ebp = 0 --- Tracing command intr pid 12 tid 100028 td 0xc4767480 fork_trampoline() at fork_trampoline Tracing command intr pid 12 tid 100027 td 0xc45c36c0 fork_trampoline() at fork_trampoline Tracing command intr pid 12 tid 100022 td 0xc4756240 fork_trampoline() at fork_trampoline Tracing command intr pid 12 tid 100020 td 0xc47566c0 sched_switch(c47566c0,0,109,191,8eb83312,...) at sched_switch+0x406 mi_switch(109,0,c0c9b247,52d,c4703bf0,...) at mi_switch+0x200 ithread_loop(c4516970,c4311d38,c0c9afc8,343,c457a7f8,...) at ithread_loop+0x1f6 fork_exit(c0867be0,c4516970,c4311d38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc4311d70, ebp = 0 --- Tracing command intr pid 12 tid 100019 td 0xc4756900 fork_trampoline() at fork_trampoline Tracing command intr pid 12 tid 100012 td 0xc457ed80 sched_switch(c457ed80,0,109,191,6a37af9e,...) at sched_switch+0x406 mi_switch(109,0,c0c9b247,52d,c45c1c70,...) at mi_switch+0x200 ithread_loop(c4579090,c42f9d38,c0c9afc8,343,c457a7f8,...) at ithread_loop+0x1f6 fork_exit(c0867be0,c4579090,c42f9d38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc42f9d70, ebp = 0 --- Tracing command intr pid 12 tid 100011 td 0xc45c3000 sched_switch(c45c3000,0,109,191,6662bd3e,...) at sched_switch+0x406 mi_switch(109,0,c0c9b247,52d,c45c1cf0,...) at mi_switch+0x200 ithread_loop(c45790a0,c42f6d38,c0c9afc8,343,c457a7f8,...) at ithread_loop+0x1f6 fork_exit(c0867be0,c45790a0,c42f6d38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc42f6d70, ebp = 0 --- Tracing command intr pid 12 tid 100010 td 0xc45c3240 sched_switch(c45c3240,0,109,191,57121e10,...) at sched_switch+0x406 mi_switch(109,0,c0c9b247,52d,c45c1d70,...) at mi_switch+0x200 ithread_loop(c45790b0,c42f3d38,c0c9afc8,343,c457a7f8,...) at ithread_loop+0x1f6 fork_exit(c0867be0,c45790b0,c42f3d38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc42f3d70, ebp = 0 --- Tracing command intr pid 12 tid 100009 td 0xc45c3480 sched_switch(c45c3480,0,109,191,6e8868c8,...) at sched_switch+0x406 mi_switch(109,0,c0c9b247,52d,c45c1df0,...) at mi_switch+0x200 ithread_loop(c45790c0,c42f0d38,c0c9afc8,343,c457a7f8,...) at ithread_loop+0x1f6 fork_exit(c0867be0,c45790c0,c42f0d38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc42f0d70, ebp = 0 --- Tracing command intr pid 12 tid 100008 td 0xc457c000 fork_trampoline() at fork_trampoline Tracing command intr pid 12 tid 100007 td 0xc457c240 sched_switch(c457c240,0,109,191,ba5b8e0c,...) at sched_switch+0x406 mi_switch(109,0,c0c9b247,52d,c45c1ef0,...) at mi_switch+0x200 ithread_loop(c45790e0,c42ead38,c0c9afc8,343,c457a7f8,...) at ithread_loop+0x1f6 fork_exit(c0867be0,c45790e0,c42ead38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc42ead70, ebp = 0 --- Tracing command idle pid 11 tid 100006 td 0xc457c480 cpustop_handler(1,c42e4c38,c0bcc5e6,369e99,0,...) at cpustop_handler+0x32 ipi_nmi_handler(369e99,0,0,c42e4bfc,c457aaa0,...) at ipi_nmi_handler+0x2f trap(c42e4c44) at trap+0x36 calltrap() at calltrap+0x6 --- trap 0x13, eip = 0xc0bccd05, esp = 0xc42e4c84, ebp = 0xc42e4c84 --- rdtsc(c0e024c0,14,c0ca05a8,18c,c087ecb4,...) at rdtsc+0x5 mi_switch(108,0,c0ca1674,3ae,c457c480,...) at mi_switch+0x164 sched_idletd(0,c42e4d38,c0c9afc8,343,c457aaa0,...) at sched_idletd+0x19b fork_exit(c08b16a0,0,c42e4d38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc42e4d70, ebp = 0 --- Tracing command idle pid 11 tid 100005 td 0xc457c6c0 sched_switch(c457c6c0,0,108,18c,6a2615fc,...) at sched_switch+0x406 mi_switch(108,0,c0ca1674,3ae,c457c6c0,...) at mi_switch+0x200 sched_idletd(0,c42e1d38,c0c9afc8,343,c457aaa0,...) at sched_idletd+0x19b fork_exit(c08b16a0,0,c42e1d38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc42e1d70, ebp = 0 --- Tracing command idle pid 11 tid 100004 td 0xc457c900 sched_switch(c457c900,0,108,18c,6aa67092,...) at sched_switch+0x406 mi_switch(108,0,c0ca1674,3ae,c457c900,...) at mi_switch+0x200 sched_idletd(0,c42ded38,c0c9afc8,343,c457aaa0,...) at sched_idletd+0x19b fork_exit(c08b16a0,0,c42ded38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc42ded70, ebp = 0 --- Tracing command idle pid 11 tid 100003 td 0xc457cb40 sched_switch(c457cb40,0,108,18c,6a8950ae,...) at sched_switch+0x406 mi_switch(108,0,c0ca1674,3ae,c457cb40,...) at mi_switch+0x200 sched_idletd(0,c42dbd38,c0c9afc8,343,c457aaa0,...) at sched_idletd+0x19b fork_exit(c08b16a0,0,c42dbd38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc42dbd70, ebp = 0 --- Tracing command init pid 1 tid 100002 td 0xc457cd80 sched_switch(c457cd80,0,104,191,e505beb4,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,5c,...) at mi_switch+0x200 sleepq_switch(c457cd80,0,c0ca4047,1a0,5c,...) at sleepq_switch+0x15f sleepq_catch_signals(c0ca4047,160,0,100,100,...) at sleepq_catch_signals+0xb7 sleepq_wait_sig(c457ad48,5c,c0ca6869,100,0,...) at sleepq_wait_sig+0x17 _sleep(c457ad48,c457add0,15c,c0ca6869,0,...) at _sleep+0x354 kern_wait(c457cd80,ffffffff,c42d7c74,0,0,...) at kern_wait+0xb76 wait4(c457cd80,c42d7cf8,10,c0ca6699,c0d8bfe4,...) at wait4+0x3b syscall(c42d7d38) at syscall+0x2b4 Xint0x80_syscall() at Xint0x80_syscall+0x20 --- syscall (7, FreeBSD ELF32, wait4), eip = 0x8054def, esp = 0xbfbfe90c, ebp = 0xbfbfe928 --- Tracing command audit pid 10 tid 100001 td 0xc457e000 sched_switch(c457e000,0,104,191,b8aa2a74,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,0,...) at mi_switch+0x200 sleepq_switch(c457e000,0,c0ca4047,260,c457e000,...) at sleepq_switch+0x15f sleepq_wait(c0f76240,0,c42d4c9c,1,0,...) at sleepq_wait+0x63 _cv_wait(c0f76240,c0f76224,c0cc408c,194,0,...) at _cv_wait+0x240 audit_worker(0,c42d4d38,c0c9afc8,343,c457b000,...) at audit_worker+0x84 fork_exit(c0a949e0,0,c42d4d38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc42d4d70, ebp = 0 --- Tracing command kernel pid 0 tid 100029 td 0xc4767240 sched_switch(c4767240,0,104,191,b803f18c,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,0,...) at mi_switch+0x200 sleepq_switch(c4767240,0,c0ca4047,260,c4767240,...) at sleepq_switch+0x15f sleepq_wait(c4782340,0,c0ca06e0,c0c9544e,0,...) at sleepq_wait+0x63 msleep_spin(c4782340,c4782358,c0c9544e,0,c0c9e273,...) at msleep_spin+0x21d taskqueue_thread_loop(c478a5a0,c4352d38,c0c9afc8,343,c0dfc3c0,...) at taskqueue_thread_loop+0x94 fork_exit(c08cae80,c478a5a0,c4352d38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc4352d70, ebp = 0 --- Tracing command kernel pid 0 tid 100026 td 0xc45c3900 sched_switch(c45c3900,0,104,191,1d509c1c,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,0,...) at mi_switch+0x200 sleepq_switch(c45c3900,0,c0ca4047,260,0,...) at sleepq_switch+0x15f sleepq_wait(c4749180,0,c0c9544e,0,0,...) at sleepq_wait+0x63 _sleep(c4749180,c4749198,0,c0c9544e,0,...) at _sleep+0x36b taskqueue_thread_loop(c0dfcc50,c4323d38,c0c9afc8,343,c0dfc3c0,...) at taskqueue_thread_loop+0xba fork_exit(c08cae80,c0dfcc50,c4323d38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc4323d70, ebp = 0 --- Tracing command kernel pid 0 tid 100025 td 0xc45c3b40 sched_switch(c45c3b40,0,104,191,1d506ea4,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,0,...) at mi_switch+0x200 sleepq_switch(c45c3b40,0,c0ca4047,260,c45c3b40,...) at sleepq_switch+0x15f sleepq_wait(c47491c0,0,c0ca06e0,c0c9544e,0,...) at sleepq_wait+0x63 msleep_spin(c47491c0,c47491d8,c0c9544e,0,c0c9e273,...) at msleep_spin+0x21d taskqueue_thread_loop(c0dcd720,c4320d38,c0c9afc8,343,c0dfc3c0,...) at taskqueue_thread_loop+0x94 fork_exit(c08cae80,c0dcd720,c4320d38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc4320d70, ebp = 0 --- Tracing command kernel pid 0 tid 100024 td 0xc45c3d80 sched_switch(c45c3d80,0,104,191,1d504ab4,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,0,...) at mi_switch+0x200 sleepq_switch(c45c3d80,0,c0ca4047,260,c45c3d80,...) at sleepq_switch+0x15f sleepq_wait(c47491c0,0,c0ca06e0,c0c9544e,0,...) at sleepq_wait+0x63 msleep_spin(c47491c0,c47491d8,c0c9544e,0,c0c9e273,...) at msleep_spin+0x21d taskqueue_thread_loop(c0dcd720,c431dd38,c0c9afc8,343,c0dfc3c0,...) at taskqueue_thread_loop+0x94 fork_exit(c08cae80,c0dcd720,c431dd38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc431dd70, ebp = 0 --- Tracing command kernel pid 0 tid 100023 td 0xc4756000 sched_switch(c4756000,0,104,191,1d502188,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,0,...) at mi_switch+0x200 sleepq_switch(c4756000,0,c0ca4047,260,c4756000,...) at sleepq_switch+0x15f sleepq_wait(c47491c0,0,c0ca06e0,c0c9544e,0,...) at sleepq_wait+0x63 msleep_spin(c47491c0,c47491d8,c0c9544e,0,c0c9e273,...) at msleep_spin+0x21d taskqueue_thread_loop(c0dcd720,c431ad38,c0c9afc8,343,c0dfc3c0,...) at taskqueue_thread_loop+0x94 fork_exit(c08cae80,c0dcd720,c431ad38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc431ad70, ebp = 0 --- Tracing command kernel pid 0 tid 100018 td 0xc4756b40 sched_switch(c4756b40,0,104,191,1d4c3e34,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,0,...) at mi_switch+0x200 sleepq_switch(c4756b40,0,c0ca4047,260,0,...) at sleepq_switch+0x15f sleepq_wait(c4749600,0,c0c9544e,0,0,...) at sleepq_wait+0x63 _sleep(c4749600,c4749618,0,c0c9544e,0,...) at _sleep+0x36b taskqueue_thread_loop(c0e10f48,c430bd38,c0c9afc8,343,c0dfc3c0,...) at taskqueue_thread_loop+0xba fork_exit(c08cae80,c0e10f48,c430bd38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc430bd70, ebp = 0 --- Tracing command kernel pid 0 tid 100013 td 0xc457eb40 sched_switch(c457eb40,0,104,191,558e9d9a,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,0,...) at mi_switch+0x200 sleepq_switch(c457eb40,0,c0ca4047,260,0,...) at sleepq_switch+0x15f sleepq_wait(c4561dc0,0,c0c9544e,0,0,...) at sleepq_wait+0x63 _sleep(c4561dc0,c4561dd8,0,c0c9544e,0,...) at _sleep+0x36b taskqueue_thread_loop(c0e0f9e0,c42fcd38,c0c9afc8,343,c0dfc3c0,...) at taskqueue_thread_loop+0xba fork_exit(c08cae80,c0e0f9e0,c42fcd38) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc42fcd70, ebp = 0 --- Tracing command kernel pid 0 tid 100000 td 0xc0dfc670 sched_switch(c0dfc670,0,104,191,4b4b19cc,...) at sched_switch+0x406 mi_switch(104,0,c0ca4047,1eb,44,...) at mi_switch+0x200 sleepq_switch(c0dfc670,0,c0ca4047,283,0,...) at sleepq_switch+0x15f sleepq_timedwait(c0dfc3c0,44,c0ca1ee9,0,0,...) at sleepq_timedwait+0x6b _sleep(c0dfc3c0,0,44,c0ca1ee9,2710,...) at _sleep+0x339 scheduler(0,141ec00,141ec00,141e000,1425000,...) at scheduler+0x23e mi_startup() at mi_startup+0x96 begin() at begin+0x2c db:0:allt> call doadump Physical memory: 1007 MB Dumping 209 MB: 194 178 162 146 130 114 98 82 66 50 34 18 2 Dump complete = 0xf db:0:doadump> reset cpu_reset: Restarting BSP cpu_reset_proxy: Stopped CPU 3 (kgdb) bt #0 doadump () at pcpu.h:246 #1 0xc04d0999 in db_fncall (dummy1=0xc08be63a, dummy2=0x0, dummy3=0xffffffff, dummy4=0xc43027f4 "\b(0Ä") at ../../../ddb/db_command.c:548 #2 0xc04d0dcf in db_command (last_cmdp=0xc0dcb8fc, cmd_table=0x0, dopager=0x0) at ../../../ddb/db_command.c:445 #3 0xc04d0e84 in db_command_script (command=0xc0dcc808 "call doadump") at ../../../ddb/db_command.c:516 #4 0xc04d5000 in db_script_exec (scriptname=0xc0dcc160 "doadump", warnifnotfound=Variable "warnifnotfound" is not available. ) at ../../../ddb/db_script.c:302 #5 0xc04d5091 in db_run_cmd (addr=0x1, have_addr=0x0, count=0xc0fcb160, modif=0xc430292c "") at ../../../ddb/db_script.c:375 #6 0xc04d0d91 in db_command (last_cmdp=0xc0dcb8fc, cmd_table=0x0, dopager=0x1) at ../../../ddb/db_command.c:445 #7 0xc04d0eea in db_command_loop () at ../../../ddb/db_command.c:498 #8 0xc04d2d8d in db_trap (type=0x3, code=0x0) at ../../../ddb/db_main.c:229 #9 0xc08be4b6 in kdb_trap (type=0x3, code=0x0, tf=0xc4302ad4) at ../../../kern/subr_kdb.c:535 #10 0xc0bccbdb in trap (frame=0xc4302ad4) at ../../../i386/i386/trap.c:690 #11 0xc0bae63b in calltrap () at ../../../i386/i386/exception.s:165 #12 0xc08be63a in kdb_enter (why=0xc0c9faeb "panic", msg=0xc0c9faeb "panic") at cpufunc.h:71 #13 0xc088ea36 in panic (fmt=0xc0c39b08 "Bad link elm %p next->prev != elm") at ../../../kern/kern_shutdown.c:562 #14 0xc0abd3e5 in handle_allocindir_partdone (aip=0xc6964280) at ../../../ufs/ffs/ffs_softdep.c:7410 #15 0xc0ac6b4b in softdep_disk_write_complete (bp=0xd85dbe80) at ../../../ufs/ffs/ffs_softdep.c:7953 #16 0xc0acf103 in ffs_backgroundwritedone (bp=0xd85dbe80) at buf.h:411 #17 0xc0909943 in bufdone (bp=0xd85dbe80) at ../../../kern/vfs_bio.c:3255 #18 0xc08308c5 in g_vfs_done (bip=0xc4aa3980) at ../../../geom/geom_vfs.c:97 #19 0xc0905f75 in biodone (bp=0xc4aa3980) at ../../../kern/vfs_bio.c:3096 #20 0xc082b627 in g_io_schedule_up (tp=0xc457e6c0) at ../../../geom/geom_io.c:669 #21 0xc082bddd in g_up_procbody () at ../../../geom/geom_kern.c:95 #22 0xc0864bf8 in fork_exit (callout=0xc082bd50 , arg=0x0, frame=0xc4302d38) at ../../../kern/kern_fork.c:843 #23 0xc0bae6b0 in fork_trampoline () at ../../../i386/i386/exception.s:270 (kgdb) f 14 #14 0xc0abd3e5 in handle_allocindir_partdone (aip=0xc6964280) at ../../../ufs/ffs/ffs_softdep.c:7410 7410 LIST_REMOVE(aip, ai_next); (kgdb) l 7405 struct indirdep *indirdep; 7406 7407 if ((aip->ai_state & ALLCOMPLETE) != ALLCOMPLETE) 7408 return; 7409 indirdep = aip->ai_indirdep; 7410 LIST_REMOVE(aip, ai_next); 7411 if (indirdep->ir_state & UNDONE) { 7412 LIST_INSERT_HEAD(&indirdep->ir_donehd, aip, ai_next); 7413 return; 7414 } (kgdb) p *aip $1 = {ai_block = {nb_list = {wk_list = {le_next = 0xc71b0680, le_prev = 0xc71a6480}, wk_mp = 0xc4e19b50, wk_type = 0x6, wk_state = 0x800d}, nb_hash = {le_next = 0xc7162e00, le_prev = 0xc71cec90}, nb_deps = {le_next = 0xc7185700, le_prev = 0xc70afebc}, nb_jnewblk = 0x0, nb_bmsafemap = 0x0, nb_freefrag = 0x0, nb_indirdeps = {lh_first = 0x0}, nb_newdirblk = {lh_first = 0x0}, nb_jwork = {lh_first = 0x0}, nb_newblkno = 0x1860b0, nb_oldblkno = 0x0, nb_offset = 0x60f}, ai_next = {le_next = 0xc71a6480, le_prev = 0xc58d522c}, ai_indirdep = 0xc58d5200} (kgdb) p *aip->ai_indirdep $2 = {ir_list = {wk_list = {le_next = 0x0, le_prev = 0xd86fe10c}, wk_mp = 0xc4e19b50, wk_type = 0x5, wk_state = 0x8109}, ir_next = {le_next = 0x70707070, le_prev = 0x70707070}, ir_saveddata = 0x0, ir_savebp = 0xd86fdf80, ir_completehd = {lh_first = 0x0}, ir_writehd = {lh_first = 0x0}, ir_donehd = {lh_first = 0x0}, ir_deplisthd = {lh_first = 0x0}, ir_jwork = {lh_first = 0x0}} (kgdb) info loc indirdep = (struct indirdep *) 0xc58d5200 (kgdb) up #15 0xc0ac6b4b in softdep_disk_write_complete (bp=0xd85dbe80) at ../../../ufs/ffs/ffs_softdep.c:7953 7953 handle_allocindir_partdone( (kgdb) p *bp $3 = {b_bufobj = 0xc4b767c4, b_bcount = 0x4000, b_caller1 = 0x0, b_data = 0xdacdd000 "", b_error = 0x0, b_iocmd = 0x2, b_ioflags = 0x2, b_iooffset = 0xb7c58000, b_resid = 0x0, b_iodone = 0, b_blkno = 0x5be2c0, b_offset = 0xb7c58000, b_bobufs = { tqe_next = 0xd8602320, tqe_prev = 0xd874a8f8}, b_left = 0x0, b_right = 0xd878c5c0, b_vflags = 0x0, b_freelist = {tqe_next = 0x0, tqe_prev = 0xc0f6ace0}, b_qindex = 0x0, b_flags = 0x24, b_xflags = 0x22, b_lock = {lock_object = { lo_name = 0xc0cab79e "bufwait", lo_flags = 0x5730000, lo_data = 0x0, lo_witness = 0xc4538030}, lk_lock = 0xfffffff0, lk_exslpfail = 0x0, lk_timo = 0x0, lk_pri = 0x50, lk_stack = {depth = 0xb, pcs = {0xc08769e8, 0xc0909bf3, 0xc0acf050, 0xc090a02c, 0xc0910b5d, 0xc080f9dc, 0xc0be2db5, 0xc09228bb, 0xc0922c03, 0xc0864bf8, 0xc0bae6b0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}}}, b_bufsize = 0x4000, b_runningbufspace = 0x0, b_kvabase = 0xdacdd000 "", b_kvasize = 0x4000, b_lblkno = 0x5be2c0, b_vp = 0xc4b766cc, b_dirtyoff = 0x0, b_dirtyend = 0x0, b_rcred = 0x0, b_wcred = 0x0, b_saveaddr = 0xdacdd000, b_pager = {pg_reqpage = 0x0}, b_cluster = {cluster_head = {tqh_first = 0x0, tqh_last = 0x0}, cluster_entry = {tqe_next = 0x0, tqe_prev = 0x0}}, b_pages = {0xc1a87fd8, 0xc2887a00, 0xc2888978, 0xc272d050, 0x0 }, b_npages = 0x4, b_dep = {lh_first = 0x0}, b_fsprivate1 = 0x0, b_fsprivate2 = 0x0, b_fsprivate3 = 0x0, b_pin_count = 0x0} (kgdb) $ svn diff -x -p /usr/src/sys Index: /usr/src/sys/ufs/ufs/ufs_dirhash.c =================================================================== --- /usr/src/sys/ufs/ufs/ufs_dirhash.c (revision 200709) +++ /usr/src/sys/ufs/ufs/ufs_dirhash.c (working copy) @@ -68,8 +68,6 @@ __FBSDID("$FreeBSD$"); static MALLOC_DEFINE(M_DIRHASH, "ufs_dirhash", "UFS directory hash tables"); -static SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD, 0, "UFS filesystem"); - static int ufs_mindirhashsize = DIRBLKSIZ * 5; SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_minsize, CTLFLAG_RW, &ufs_mindirhashsize, Index: /usr/src/sys/ufs/ufs/ufs_vnops.c =================================================================== --- /usr/src/sys/ufs/ufs/ufs_vnops.c (revision 200709) +++ /usr/src/sys/ufs/ufs/ufs_vnops.c (working copy) @@ -114,6 +114,8 @@ static vop_close_t ufsfifo_close; static vop_kqfilter_t ufsfifo_kqfilter; static vop_pathconf_t ufsfifo_pathconf; +SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD, 0, "UFS filesystem"); + /* * A virgin directory (no blushing please). */ @@ -902,6 +904,9 @@ ufs_link(ap) error = EXDEV; goto out; } + if (VTOI(tdvp)->i_effnlink < 2) + panic("ufs_link: Bad link count %d on parent", + VTOI(tdvp)->i_effnlink); ip = VTOI(vp); if ((nlink_t)ip->i_nlink >= LINK_MAX) { error = EMLINK; @@ -916,7 +921,7 @@ ufs_link(ap) DIP_SET(ip, i_nlink, ip->i_nlink); ip->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(vp)) - softdep_change_linkcnt(ip); + softdep_setup_link(VTOI(tdvp), ip); error = UFS_UPDATE(vp, !(DOINGSOFTDEP(vp) | DOINGASYNC(vp))); if (!error) { ufs_makedirentry(ip, cnp, &newdir); @@ -929,7 +934,7 @@ ufs_link(ap) DIP_SET(ip, i_nlink, ip->i_nlink); ip->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(vp)) - softdep_change_linkcnt(ip); + softdep_revert_link(VTOI(tdvp), ip); } out: return (error); @@ -990,6 +995,11 @@ ufs_whiteout(ap) return (error); } +static volatile int rename_restarts; +SYSCTL_INT(_vfs_ufs, OID_AUTO, rename_restarts, CTLFLAG_RD, + __DEVOLATILE(int *, &rename_restarts), 0, + "Times rename had to restart due to lock contention"); + /* * Rename system call. * rename("foo", "bar"); @@ -1029,14 +1039,16 @@ ufs_rename(ap) struct vnode *tdvp = ap->a_tdvp; struct vnode *fvp = ap->a_fvp; struct vnode *fdvp = ap->a_fdvp; + struct vnode *nvp; struct componentname *tcnp = ap->a_tcnp; struct componentname *fcnp = ap->a_fcnp; struct thread *td = fcnp->cn_thread; - struct inode *ip, *xp, *dp; + struct inode *ip, *xp, *tdp, *fdp; struct direct newdir; - int doingdirectory = 0, oldparent = 0, newparent = 0; + int doingdirectory, newparent; int error = 0, ioflag; - ino_t fvp_ino; + struct mount *mp; + ino_t ino; #ifdef INVARIANTS if ((tcnp->cn_flags & HASBUF) == 0 || @@ -1049,7 +1061,6 @@ ufs_rename(ap) if ((fvp->v_mount != tdvp->v_mount) || (tvp && (fvp->v_mount != tvp->v_mount))) { error = EXDEV; -abortit: if (tdvp == tvp) vrele(tdvp); else @@ -1060,63 +1071,202 @@ ufs_rename(ap) vrele(fvp); return (error); } - + mp = tdvp->v_mount; + VOP_UNLOCK(tdvp, 0); + if (tvp && tvp != tdvp) + VOP_UNLOCK(tvp, 0); + error = vfs_busy(mp, 0); + if (error) { + mp = NULL; + goto releout; + } +relock: + /* + * We need to acquire 2 to 4 locks depending on whether tvp is NULL + * and fdvp and tdvp are the same directory. Subsequently we need + * to double-check all paths and in the directory rename case we + * need to verify that we are not creating a directory loop. To + * handle this we acquire fdvp and fvp in order using blocking + * locks followed by non-blocking acquisitions for all remaining + * locks. If we fail to acquire any lock in the path we will + * drop all held locks, acquire the new lock in a blocking fashion, + * and then release it and restart the rename. This acquire/release + * step ensures that we do not spin on a lock waiting for release. + */ + error = vn_lock(fdvp, LK_EXCLUSIVE); + if (error) + goto releout; + error = vn_lock(fvp, LK_EXCLUSIVE); + if (error) { + VOP_UNLOCK(fdvp, 0); + goto releout; + } + if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) { + VOP_UNLOCK(fdvp, 0); + VOP_UNLOCK(fvp, 0); + error = vn_lock(tdvp, LK_EXCLUSIVE); + if (error) + goto releout; + VOP_UNLOCK(tdvp, 0); + atomic_add_int(&rename_restarts, 1); + goto relock; + } + /* + * If vn_lock fails due to VI_DOOMED being set on fdvp, fvp, or tdvp + * it is a fatal condition. If it occurs on tvp we carry on as if + * it was removed and ufs_lookup_ino() below will resolve the + * condition that cleared it. + */ + if (tvp && vn_lock(tvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) { + VOP_UNLOCK(fdvp, 0); + VOP_UNLOCK(tdvp, 0); + VOP_UNLOCK(fvp, 0); + error = vn_lock(tvp, LK_EXCLUSIVE); + if (error) + tvp = NULL; + else + VOP_UNLOCK(tvp, 0); + atomic_add_int(&rename_restarts, 1); + goto relock; + } + fdp = VTOI(fdvp); + ip = VTOI(fvp); + tdp = VTOI(tdvp); + xp = NULL; + if (tvp) + xp = VTOI(tvp); + /* + * Re-resolve fvp to be certain it still exists at the same inode + * number. If the lookup fails abort the rename. If the inode + * number has changed we need to VGET it and then restart the + * whole lock operation. + */ + error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino); + if (error) + goto unlockout; + if (ino != ip->i_number) { + VOP_UNLOCK(fdvp, 0); + VOP_UNLOCK(tdvp, 0); + vput(fvp); + if (tvp) + VOP_UNLOCK(tvp, 0); + error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &fvp); + if (error) + goto releout; + VOP_UNLOCK(fvp, 0); + atomic_add_int(&rename_restarts, 1); + goto relock; + } + /* + * Re-lookup to now that all of the locks are held. + */ + error = ufs_lookup_ino(tdvp, NULL, tcnp, &ino); + if (error != 0 && error != EJUSTRETURN) + goto unlockout; + /* + * If tvp disappeared we just need to restart. + */ + if (error == EJUSTRETURN && tvp != NULL) { + vput(tvp); + tvp = NULL; + VOP_UNLOCK(fdvp, 0); + VOP_UNLOCK(tdvp, 0); + VOP_UNLOCK(fvp, 0); + atomic_add_int(&rename_restarts, 1); + goto relock; + } + /* + * If tvp changed out from under us we need to drop the old one, + * VGET the new one, and restart. + */ + if (error == 0 && (tvp == NULL || xp->i_number != ino)) { + if (tvp != NULL) + vput(tvp); + VOP_UNLOCK(fdvp, 0); + VOP_UNLOCK(tdvp, 0); + VOP_UNLOCK(fvp, 0); + error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &tvp); + if (error) + tvp = NULL; + else + VOP_UNLOCK(tvp, 0); + atomic_add_int(&rename_restarts, 1); + goto relock; + } if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (VTOI(tdvp)->i_flags & APPEND))) { error = EPERM; - goto abortit; + goto unlockout; } - /* * Renaming a file to itself has no effect. The upper layers should - * not call us in that case. Temporarily just warn if they do. + * not call us in that case. */ - if (fvp == tvp) { - printf("ufs_rename: fvp == tvp (can't happen)\n"); - error = 0; - goto abortit; - } - - if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0) - goto abortit; - dp = VTOI(fdvp); - ip = VTOI(fvp); + if (fvp == tvp) + panic("ufs_rename: fvp == tvp (can't happen)\n"); + doingdirectory = 0; + newparent = 0; + ino = ip->i_number; if (ip->i_nlink >= LINK_MAX) { - VOP_UNLOCK(fvp, 0); error = EMLINK; - goto abortit; + goto unlockout; } if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) - || (dp->i_flags & APPEND)) { - VOP_UNLOCK(fvp, 0); + || (fdp->i_flags & APPEND)) { error = EPERM; - goto abortit; + goto unlockout; } if ((ip->i_mode & IFMT) == IFDIR) { /* * Avoid ".", "..", and aliases of "." for obvious reasons. */ if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || - dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT || - (ip->i_flag & IN_RENAME)) { - VOP_UNLOCK(fvp, 0); + fdp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) { error = EINVAL; - goto abortit; + goto unlockout; } - ip->i_flag |= IN_RENAME; - oldparent = dp->i_number; + if (fdp->i_number != tdp->i_number) + newparent = tdp->i_number; doingdirectory = 1; } - vrele(fdvp); /* - * When the target exists, both the directory - * and target vnodes are returned locked. + * If ".." must be changed (ie the directory gets a new + * parent) then the source directory must not be in the + * directory hierarchy above the target, as this would + * orphan everything below the source directory. Also + * the user must have write permission in the source so + * as to be able to change "..". */ - dp = VTOI(tdvp); - xp = NULL; - if (tvp) - xp = VTOI(tvp); + if (doingdirectory && newparent) { + error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread); + if (error) + goto unlockout; + error = ufs_checkpath(ino, fdp->i_number, tdp, tcnp->cn_cred, + &ino); + /* + * We encountered a lock that we have to wait for. Unlock + * everything else and VGET before restarting. + */ + if (ino) { + VOP_UNLOCK(tdvp, 0); + VOP_UNLOCK(fdvp, 0); + VOP_UNLOCK(fvp, 0); + if (tvp) + VOP_UNLOCK(tvp, 0); + error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp); + if (error == 0) + vput(nvp); + atomic_add_int(&rename_restarts, 1); + goto relock; + } + if (error) + goto unlockout; + if ((tcnp->cn_flags & SAVESTART) == 0) + panic("ufs_rename: lost to startdir"); + } + if (ip->i_effnlink == 0 || fdp->i_effnlink == 0 || tdp->i_effnlink == 0) + panic("Bad effnlink ip %p, fdp %p, tdp %p", ip, fdp, tdp); /* * 1) Bump link count while we're moving stuff @@ -1129,49 +1279,12 @@ ufs_rename(ap) DIP_SET(ip, i_nlink, ip->i_nlink); ip->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(fvp)) - softdep_change_linkcnt(ip); - if ((error = UFS_UPDATE(fvp, !(DOINGSOFTDEP(fvp) | - DOINGASYNC(fvp)))) != 0) { - VOP_UNLOCK(fvp, 0); + softdep_setup_link(tdp, ip); + error = UFS_UPDATE(fvp, !(DOINGSOFTDEP(fvp) | DOINGASYNC(fvp))); + if (error) goto bad; - } /* - * If ".." must be changed (ie the directory gets a new - * parent) then the source directory must not be in the - * directory hierarchy above the target, as this would - * orphan everything below the source directory. Also - * the user must have write permission in the source so - * as to be able to change "..". We must repeat the call - * to namei, as the parent directory is unlocked by the - * call to checkpath(). - */ - error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread); - fvp_ino = ip->i_number; - VOP_UNLOCK(fvp, 0); - if (oldparent != dp->i_number) - newparent = dp->i_number; - if (doingdirectory && newparent) { - if (error) /* write access check above */ - goto bad; - if (xp != NULL) - vput(tvp); - error = ufs_checkpath(fvp_ino, dp, tcnp->cn_cred); - if (error) - goto out; - if ((tcnp->cn_flags & SAVESTART) == 0) - panic("ufs_rename: lost to startdir"); - VREF(tdvp); - error = relookup(tdvp, &tvp, tcnp); - if (error) - goto out; - vrele(tdvp); - dp = VTOI(tdvp); - xp = NULL; - if (tvp) - xp = VTOI(tvp); - } - /* * 2) If target doesn't exist, link the target * to the source and unlink the source. * Otherwise, rewrite the target directory @@ -1179,46 +1292,27 @@ ufs_rename(ap) * expunge the original entry's existence. */ if (xp == NULL) { - if (dp->i_dev != ip->i_dev) + if (tdp->i_dev != ip->i_dev) panic("ufs_rename: EXDEV"); - /* - * Account for ".." in new directory. - * When source and destination have the same - * parent we don't fool with the link count. - */ if (doingdirectory && newparent) { - if ((nlink_t)dp->i_nlink >= LINK_MAX) { + /* + * Account for ".." in new directory. + * When source and destination have the same + * parent we don't adjust the link count. The + * actual link modification is completed when + * .. is rewritten below. + */ + if ((nlink_t)tdp->i_nlink >= LINK_MAX) { error = EMLINK; goto bad; } - dp->i_effnlink++; - dp->i_nlink++; - DIP_SET(dp, i_nlink, dp->i_nlink); - dp->i_flag |= IN_CHANGE; - if (DOINGSOFTDEP(tdvp)) - softdep_change_linkcnt(dp); - error = UFS_UPDATE(tdvp, !(DOINGSOFTDEP(tdvp) | - DOINGASYNC(tdvp))); - if (error) - goto bad; } ufs_makedirentry(ip, tcnp, &newdir); error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL); - if (error) { - if (doingdirectory && newparent) { - dp->i_effnlink--; - dp->i_nlink--; - DIP_SET(dp, i_nlink, dp->i_nlink); - dp->i_flag |= IN_CHANGE; - if (DOINGSOFTDEP(tdvp)) - softdep_change_linkcnt(dp); - (void)UFS_UPDATE(tdvp, 1); - } + if (error) goto bad; - } - vput(tdvp); } else { - if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev) + if (xp->i_dev != tdp->i_dev || xp->i_dev != ip->i_dev) panic("ufs_rename: EXDEV"); /* * Short circuit rename(foo, foo). @@ -1231,7 +1325,7 @@ ufs_rename(ap) * destination of the rename. This implements append-only * directories. */ - if ((dp->i_mode & S_ISTXT) && + if ((tdp->i_mode & S_ISTXT) && VOP_ACCESS(tdvp, VADMIN, tcnp->cn_cred, td) && VOP_ACCESS(tvp, VADMIN, tcnp->cn_cred, td)) { error = EPERM; @@ -1242,9 +1336,9 @@ ufs_rename(ap) * to it. Also, ensure source and target are compatible * (both directories, or both not directories). */ - if ((xp->i_mode&IFMT) == IFDIR) { + if ((xp->i_mode & IFMT) == IFDIR) { if ((xp->i_effnlink > 2) || - !ufs_dirempty(xp, dp->i_number, tcnp->cn_cred)) { + !ufs_dirempty(xp, tdp->i_number, tcnp->cn_cred)) { error = ENOTEMPTY; goto bad; } @@ -1257,16 +1351,16 @@ ufs_rename(ap) error = EISDIR; goto bad; } - error = ufs_dirrewrite(dp, xp, ip->i_number, + error = ufs_dirrewrite(tdp, xp, ip->i_number, IFTODT(ip->i_mode), (doingdirectory && newparent) ? newparent : doingdirectory); if (error) goto bad; if (doingdirectory) { if (!newparent) { - dp->i_effnlink--; + tdp->i_effnlink--; if (DOINGSOFTDEP(tdvp)) - softdep_change_linkcnt(dp); + softdep_change_linkcnt(tdp); } xp->i_effnlink--; if (DOINGSOFTDEP(tvp)) @@ -1285,9 +1379,9 @@ ufs_rename(ap) * them now. */ if (!newparent) { - dp->i_nlink--; - DIP_SET(dp, i_nlink, dp->i_nlink); - dp->i_flag |= IN_CHANGE; + tdp->i_nlink--; + DIP_SET(tdp, i_nlink, tdp->i_nlink); + tdp->i_flag |= IN_CHANGE; } xp->i_nlink--; DIP_SET(xp, i_nlink, xp->i_nlink); @@ -1295,105 +1389,80 @@ ufs_rename(ap) ioflag = IO_NORMAL; if (!DOINGASYNC(tvp)) ioflag |= IO_SYNC; + /* Don't go to bad here as the new link exists. */ if ((error = UFS_TRUNCATE(tvp, (off_t)0, ioflag, tcnp->cn_cred, tcnp->cn_thread)) != 0) - goto bad; + goto unlockout; } - vput(tdvp); - vput(tvp); - xp = NULL; } /* - * 3) Unlink the source. + * 3) Unlink the source. We have to resolve the path again to + * fixup the directory offset and count for ufs_dirremove. */ - fcnp->cn_flags &= ~MODMASK; - fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; - if ((fcnp->cn_flags & SAVESTART) == 0) - panic("ufs_rename: lost from startdir"); - VREF(fdvp); - error = relookup(fdvp, &fvp, fcnp); - if (error == 0) - vrele(fdvp); - if (fvp != NULL) { - xp = VTOI(fvp); - dp = VTOI(fdvp); - } else { - /* - * From name has disappeared. IN_RENAME is not sufficient - * to protect against directory races due to timing windows, - * so we have to remove the panic. XXX the only real way - * to solve this issue is at a much higher level. By the - * time we hit ufs_rename() it's too late. - */ -#if 0 - if (doingdirectory) - panic("ufs_rename: lost dir entry"); -#endif - vrele(ap->a_fvp); - return (0); + if (fdvp == tdvp) { + error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino); + if (error) + panic("ufs_rename: from entry went away!"); } /* - * Ensure that the directory entry still exists and has not - * changed while the new name has been entered. If the source is - * a file then the entry may have been unlinked or renamed. In - * either case there is no further work to be done. If the source - * is a directory then it cannot have been rmdir'ed; the IN_RENAME - * flag ensures that it cannot be moved by another rename or removed - * by a rmdir. + * If the source is a directory with a + * new parent, the link count of the old + * parent directory must be decremented + * and ".." set to point to the new parent. */ - if (xp != ip) { + if (doingdirectory && newparent) { /* - * From name resolves to a different inode. IN_RENAME is - * not sufficient protection against timing window races - * so we can't panic here. XXX the only real way - * to solve this issue is at a much higher level. By the - * time we hit ufs_rename() it's too late. + * If xp exists we simply use its link, otherwise we must + * add a new one. */ -#if 0 - if (doingdirectory) - panic("ufs_rename: lost dir entry"); -#endif - } else { - /* - * If the source is a directory with a - * new parent, the link count of the old - * parent directory must be decremented - * and ".." set to point to the new parent. - */ - if (doingdirectory && newparent) { - xp->i_offset = mastertemplate.dot_reclen; - ufs_dirrewrite(xp, dp, newparent, DT_DIR, 0); - cache_purge(fdvp); + if (xp == NULL) { + tdp->i_effnlink++; + tdp->i_nlink++; + DIP_SET(tdp, i_nlink, tdp->i_nlink); + tdp->i_flag |= IN_CHANGE; + if (DOINGSOFTDEP(tdvp)) + softdep_setup_dotdot_link(tdp, ip); + error = UFS_UPDATE(tdvp, !(DOINGSOFTDEP(tdvp) | + DOINGASYNC(tdvp))); + /* Don't go to bad here as the new link exists. */ + if (error) + goto unlockout; } - error = ufs_dirremove(fdvp, xp, fcnp->cn_flags, 0); - xp->i_flag &= ~IN_RENAME; + ip->i_offset = mastertemplate.dot_reclen; + ufs_dirrewrite(ip, fdp, newparent, DT_DIR, 0); + cache_purge(fdvp); } - if (dp) - vput(fdvp); - if (xp) - vput(fvp); - vrele(ap->a_fvp); + error = ufs_dirremove(fdvp, ip, fcnp->cn_flags, 0); + +unlockout: + vput(tdvp); + vput(fdvp); + vput(fvp); + if (tvp) + vput(tvp); + if (mp) + vfs_unbusy(mp); return (error); bad: - if (xp) - vput(ITOV(xp)); - vput(ITOV(dp)); -out: - if (doingdirectory) - ip->i_flag &= ~IN_RENAME; - if (vn_lock(fvp, LK_EXCLUSIVE) == 0) { - ip->i_effnlink--; - ip->i_nlink--; - DIP_SET(ip, i_nlink, ip->i_nlink); - ip->i_flag |= IN_CHANGE; - ip->i_flag &= ~IN_RENAME; - if (DOINGSOFTDEP(fvp)) - softdep_change_linkcnt(ip); - vput(fvp); - } else - vrele(fvp); + ip->i_effnlink--; + ip->i_nlink--; + DIP_SET(ip, i_nlink, ip->i_nlink); + ip->i_flag |= IN_CHANGE; + if (DOINGSOFTDEP(fvp)) + softdep_revert_link(tdp, ip); + goto unlockout; + +releout: + vrele(tdvp); + if (tvp) + vrele(tvp); + vrele(fdvp); + vrele(fvp); + if (mp) + vfs_unbusy(mp); + return (error); } @@ -1565,8 +1634,7 @@ ufs_mkdir(ap) ip->i_effnlink = 2; ip->i_nlink = 2; DIP_SET(ip, i_nlink, 2); - if (DOINGSOFTDEP(tvp)) - softdep_change_linkcnt(ip); + if (cnp->cn_flags & ISWHITEOUT) { ip->i_flags |= UF_OPAQUE; DIP_SET(ip, i_flags, ip->i_flags); @@ -1582,8 +1650,8 @@ ufs_mkdir(ap) DIP_SET(dp, i_nlink, dp->i_nlink); dp->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(dvp)) - softdep_change_linkcnt(dp); - error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(dvp) | DOINGASYNC(dvp))); + softdep_setup_mkdir(dp, ip); + error = UFS_UPDATE(dvp, !(DOINGSOFTDEP(dvp) | DOINGASYNC(dvp))); if (error) goto bad; #ifdef MAC @@ -1701,8 +1769,6 @@ bad: dp->i_nlink--; DIP_SET(dp, i_nlink, dp->i_nlink); dp->i_flag |= IN_CHANGE; - if (DOINGSOFTDEP(dvp)) - softdep_change_linkcnt(dp); /* * No need to do an explicit VOP_TRUNCATE here, vrele will * do this for us because we set the link count to 0. @@ -1712,7 +1778,8 @@ bad: DIP_SET(ip, i_nlink, 0); ip->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(tvp)) - softdep_change_linkcnt(ip); + softdep_revert_mkdir(dp, ip); + vput(tvp); } out: @@ -1748,10 +1815,13 @@ ufs_rmdir(ap) * tries to remove a locally mounted on directory). */ error = 0; - if ((ip->i_flag & IN_RENAME) || ip->i_effnlink < 2) { + if (ip->i_effnlink < 2) { error = EINVAL; goto out; } + if (dp->i_effnlink < 3) + panic("ufs_dirrem: Bad link count %d on parent", + dp->i_effnlink); if (!ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) { error = ENOTEMPTY; goto out; @@ -1775,18 +1845,14 @@ ufs_rmdir(ap) */ dp->i_effnlink--; ip->i_effnlink--; - if (DOINGSOFTDEP(vp)) { - softdep_change_linkcnt(dp); - softdep_change_linkcnt(ip); - } + if (DOINGSOFTDEP(vp)) + softdep_setup_rmdir(dp, ip); error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1); if (error) { dp->i_effnlink++; ip->i_effnlink++; - if (DOINGSOFTDEP(vp)) { - softdep_change_linkcnt(dp); - softdep_change_linkcnt(ip); - } + if (DOINGSOFTDEP(vp)) + softdep_revert_rmdir(dp, ip); goto out; } cache_purge(dvp); @@ -2282,6 +2348,9 @@ ufs_makeinode(mode, dvp, vpp, cnp) if ((mode & IFMT) == 0) mode |= IFREG; + if (VTOI(dvp)->i_effnlink < 2) + panic("ufs_makeinode: Bad link count %d on parent", + VTOI(dvp)->i_effnlink); error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp); if (error) return (error); @@ -2411,7 +2480,7 @@ ufs_makeinode(mode, dvp, vpp, cnp) ip->i_nlink = 1; DIP_SET(ip, i_nlink, 1); if (DOINGSOFTDEP(tvp)) - softdep_change_linkcnt(ip); + softdep_setup_create(VTOI(dvp), ip); if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) && priv_check_cred(cnp->cn_cred, PRIV_VFS_SETGID, 0)) { ip->i_mode &= ~ISGID; @@ -2484,7 +2553,7 @@ bad: DIP_SET(ip, i_nlink, 0); ip->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(tvp)) - softdep_change_linkcnt(ip); + softdep_revert_create(VTOI(dvp), ip); vput(tvp); return (error); } Index: /usr/src/sys/ufs/ufs/ufsmount.h =================================================================== --- /usr/src/sys/ufs/ufs/ufsmount.h (revision 200709) +++ /usr/src/sys/ufs/ufs/ufsmount.h (working copy) @@ -57,6 +57,7 @@ struct ucred; struct uio; struct vnode; struct ufs_extattr_per_mount; +struct jblocks; /* This structure describes the UFS specific mount structure data. */ struct ufsmount { @@ -75,6 +76,10 @@ struct ufsmount { long um_numindirdeps; /* outstanding indirdeps */ struct workhead softdep_workitem_pending; /* softdep work queue */ struct worklist *softdep_worklist_tail; /* Tail pointer for above */ + struct workhead softdep_journal_pending; /* journal work queue */ + struct worklist *softdep_journal_tail; /* Tail pointer for above */ + struct jblocks *softdep_jblocks; /* Journal block information */ + int softdep_on_journal; /* Items on the journal list */ int softdep_on_worklist; /* Items on the worklist */ int softdep_on_worklist_inprogress; /* Busy items on worklist */ int softdep_deps; /* Total dependency count */ Index: /usr/src/sys/ufs/ufs/ufs_lookup.c =================================================================== --- /usr/src/sys/ufs/ufs/ufs_lookup.c (revision 200709) +++ /usr/src/sys/ufs/ufs/ufs_lookup.c (working copy) @@ -77,9 +77,6 @@ SYSCTL_INT(_debug, OID_AUTO, dircheck, CTLFLAG_RW, /* true if old FS format...*/ #define OFSFMT(vp) ((vp)->v_mount->mnt_maxsymlinklen <= 0) -static int ufs_lookup_(struct vnode *, struct vnode **, struct componentname *, - ino_t *); - /* * Convert a component of a pathname into a pointer to a locked inode. * This is a very central and rather complicated routine. @@ -134,11 +131,11 @@ ufs_lookup(ap) } */ *ap; { - return (ufs_lookup_(ap->a_dvp, ap->a_vpp, ap->a_cnp, NULL)); + return (ufs_lookup_ino(ap->a_dvp, ap->a_vpp, ap->a_cnp, NULL)); } -static int -ufs_lookup_(struct vnode *vdp, struct vnode **vpp, struct componentname *cnp, +int +ufs_lookup_ino(struct vnode *vdp, struct vnode **vpp, struct componentname *cnp, ino_t *dd_ino) { struct inode *dp; /* inode for directory being searched */ @@ -464,6 +461,8 @@ notfound: return (ENOENT); found: + if (dd_ino != NULL) + *dd_ino = ino; if (numdirpasses == 2) nchstats.ncs_pass2++; /* @@ -486,11 +485,6 @@ found: if ((flags & ISLASTCN) && nameiop == LOOKUP) dp->i_diroff = i_offset &~ (DIRBLKSIZ - 1); - if (dd_ino != NULL) { - *dd_ino = ino; - return (0); - } - /* * If deleting, and at end of pathname, return * parameters which can be used to remove file. @@ -520,6 +514,8 @@ found: dp->i_count = 0; else dp->i_count = dp->i_offset - prevoff; + if (dd_ino != NULL) + return (0); if (dp->i_number == ino) { VREF(vdp); *vpp = vdp; @@ -560,6 +556,8 @@ found: dp->i_offset = i_offset; if (dp->i_number == ino) return (EISDIR); + if (dd_ino != NULL) + return (0); if ((error = VFS_VGET(vdp->v_mount, ino, LK_EXCLUSIVE, &tdp)) != 0) return (error); @@ -567,6 +565,8 @@ found: cnp->cn_flags |= SAVENAME; return (0); } + if (dd_ino != NULL) + return (0); /* * Step through the translation in the name. We do not `vput' the @@ -598,7 +598,7 @@ found: * to the inode we looked up before vdp lock was * dropped. */ - error = ufs_lookup_(pdp, NULL, cnp, &ino1); + error = ufs_lookup_ino(pdp, NULL, cnp, &ino1); if (error) { vput(tdp); return (error); @@ -819,29 +819,12 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdirbp) (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ; blkoff += DIRBLKSIZ; } - if (softdep_setup_directory_add(bp, dp, dp->i_offset, - dirp->d_ino, newdirbp, 1) == 0) { - bdwrite(bp); - return (UFS_UPDATE(dvp, 0)); - } - /* We have just allocated a directory block in an - * indirect block. Rather than tracking when it gets - * claimed by the inode, we simply do a VOP_FSYNC - * now to ensure that it is there (in case the user - * does a future fsync). Note that we have to unlock - * the inode for the entry that we just entered, as - * the VOP_FSYNC may need to lock other inodes which - * can lead to deadlock if we also hold a lock on - * the newly entered node. - */ - if ((error = bwrite(bp))) - return (error); - if (tvp != NULL) - VOP_UNLOCK(tvp, 0); - error = VOP_FSYNC(dvp, MNT_WAIT, td); - if (tvp != NULL) - vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY); - return (error); + softdep_setup_directory_add(bp, dp, dp->i_offset, + dirp->d_ino, newdirbp, 1); + if (newdirbp) + bdwrite(newdirbp); + bdwrite(bp); + return (UFS_UPDATE(dvp, 0)); } if (DOINGASYNC(dvp)) { bdwrite(bp); @@ -976,6 +959,8 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdirbp) (void) softdep_setup_directory_add(bp, dp, dp->i_offset + (caddr_t)ep - dirbuf, dirp->d_ino, newdirbp, 0); + if (newdirbp) + bdwrite(newdirbp); bdwrite(bp); } else { if (DOINGASYNC(dvp)) { @@ -1084,7 +1069,7 @@ out: if (DOINGSOFTDEP(dvp)) { if (ip) { ip->i_effnlink--; - softdep_change_linkcnt(ip); + softdep_setup_unlink(dp, ip); softdep_setup_remove(bp, dp, ip, isrmdir); } if (softdep_slowdown(dvp)) { @@ -1146,7 +1131,7 @@ ufs_dirrewrite(dp, oip, newinum, newtype, isrmdir) ep->d_type = newtype; oip->i_effnlink--; if (DOINGSOFTDEP(vdp)) { - softdep_change_linkcnt(oip); + softdep_setup_unlink(dp, oip); softdep_setup_directory_change(bp, dp, oip, newinum, isrmdir); bdwrite(bp); } else { @@ -1267,25 +1252,25 @@ ufs_dir_dd_ino(struct vnode *vp, struct ucred *cre /* * Check if source directory is in the path of the target directory. - * Target is supplied locked, source is unlocked. - * The target is always vput before returning. */ int -ufs_checkpath(ino_t source_ino, struct inode *target, struct ucred *cred) +ufs_checkpath(ino_t source_ino, ino_t parent_ino, struct inode *target, struct ucred *cred, ino_t *wait_ino) { - struct vnode *vp, *vp1; + struct mount *mp; + struct vnode *tvp, *vp, *vp1; int error; ino_t dd_ino; - vp = ITOV(target); - if (target->i_number == source_ino) { - error = EEXIST; - goto out; - } - error = 0; + vp = tvp = ITOV(target); + mp = vp->v_mount; + *wait_ino = 0; + if (target->i_number == source_ino) + return (EEXIST); + if (target->i_number == parent_ino) + return (0); if (target->i_number == ROOTINO) - goto out; - + return (0); + error = 0; for (;;) { error = ufs_dir_dd_ino(vp, cred, &dd_ino); if (error != 0) @@ -1296,9 +1281,13 @@ int } if (dd_ino == ROOTINO) break; - error = vn_vget_ino(vp, dd_ino, LK_EXCLUSIVE, &vp1); - if (error != 0) + if (dd_ino == parent_ino) break; + error = VFS_VGET(mp, dd_ino, LK_EXCLUSIVE | LK_NOWAIT, &vp1); + if (error != 0) { + *wait_ino = dd_ino; + break; + } /* Recheck that ".." still points to vp1 after relock of vp */ error = ufs_dir_dd_ino(vp, cred, &dd_ino); if (error != 0) { @@ -1310,14 +1299,14 @@ int vput(vp1); continue; } - vput(vp); + if (vp != tvp) + vput(vp); vp = vp1; } -out: if (error == ENOTDIR) - printf("checkpath: .. not a directory\n"); - if (vp != NULL) + panic("checkpath: .. not a directory\n"); + if (vp != tvp) vput(vp); return (error); } Index: /usr/src/sys/ufs/ufs/ufs_extern.h =================================================================== --- /usr/src/sys/ufs/ufs/ufs_extern.h (revision 200709) +++ /usr/src/sys/ufs/ufs/ufs_extern.h (working copy) @@ -57,7 +57,7 @@ int ufs_bmap(struct vop_bmap_args *); int ufs_bmaparray(struct vnode *, ufs2_daddr_t, ufs2_daddr_t *, struct buf *, int *, int *); int ufs_fhtovp(struct mount *, struct ufid *, struct vnode **); -int ufs_checkpath(ino_t, struct inode *, struct ucred *); +int ufs_checkpath(ino_t, ino_t, struct inode *, struct ucred *, ino_t *); void ufs_dirbad(struct inode *, doff_t, char *); int ufs_dirbadentry(struct vnode *, struct direct *, int); int ufs_dirempty(struct inode *, ino_t, struct ucred *); @@ -69,6 +69,8 @@ int ufs_direnter(struct vnode *, struct vnode *, struct componentname *, struct buf *); int ufs_dirremove(struct vnode *, struct inode *, int, int); int ufs_dirrewrite(struct inode *, struct inode *, ino_t, int, int); +int ufs_lookup_ino(struct vnode *, struct vnode **, struct componentname *, + ino_t *); int ufs_getlbns(struct vnode *, ufs2_daddr_t, struct indir *, int *); int ufs_inactive(struct vop_inactive_args *); int ufs_init(struct vfsconf *); @@ -81,6 +83,9 @@ vfs_root_t ufs_root; int ufs_uninit(struct vfsconf *); int ufs_vinit(struct mount *, struct vop_vector *, struct vnode **); +#include +SYSCTL_DECL(_vfs_ufs); + /* * Soft update function prototypes. */ @@ -94,6 +99,17 @@ void softdep_setup_directory_change(struct buf *, void softdep_change_linkcnt(struct inode *); void softdep_releasefile(struct inode *); int softdep_slowdown(struct vnode *); +void softdep_setup_create(struct inode *, struct inode *); +void softdep_setup_dotdot_link(struct inode *, struct inode *); +void softdep_setup_link(struct inode *, struct inode *); +void softdep_setup_mkdir(struct inode *, struct inode *); +void softdep_setup_rmdir(struct inode *, struct inode *); +void softdep_setup_unlink(struct inode *, struct inode *); +void softdep_revert_create(struct inode *, struct inode *); +void softdep_revert_dotdot_link(struct inode *, struct inode *); +void softdep_revert_link(struct inode *, struct inode *); +void softdep_revert_mkdir(struct inode *, struct inode *); +void softdep_revert_rmdir(struct inode *, struct inode *); /* * Flags to low-level allocation routines. The low 16-bits are reserved Index: /usr/src/sys/ufs/ffs/ffs_vfsops.c =================================================================== --- /usr/src/sys/ufs/ffs/ffs_vfsops.c (revision 200709) +++ /usr/src/sys/ufs/ffs/ffs_vfsops.c (working copy) @@ -858,6 +858,7 @@ ffs_mountfs(devvp, mp, td) */ bzero(fs->fs_fsmnt, MAXMNTLEN); strlcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MAXMNTLEN); + mp->mnt_stat.f_iosize = fs->fs_bsize; if( mp->mnt_flag & MNT_ROOTFS) { /* @@ -899,7 +900,6 @@ ffs_mountfs(devvp, mp, td) * This would all happen while the filesystem was busy/not * available, so would effectively be "atomic". */ - mp->mnt_stat.f_iosize = fs->fs_bsize; (void) ufs_extattr_autostart(mp, td); #endif /* !UFS_EXTATTR_AUTOSTART */ #endif /* !UFS_EXTATTR */ @@ -1829,9 +1829,6 @@ ffs_bufwrite(struct buf *bp) } BO_UNLOCK(bp->b_bufobj); - /* Mark the buffer clean */ - bundirty(bp); - /* * If this buffer is marked for background writing and we * do not have to wait for it, make a copy and write the @@ -1872,9 +1869,16 @@ ffs_bufwrite(struct buf *bp) newbp->b_flags &= ~B_INVAL; #ifdef SOFTUPDATES - /* move over the dependencies */ - if (!LIST_EMPTY(&bp->b_dep)) - softdep_move_dependencies(bp, newbp); + /* + * Move over the dependencies. If there are rollbacks, + * leave the parent buffer dirtied as it will need to + * be written again. + */ + if (LIST_EMPTY(&bp->b_dep) || + softdep_move_dependencies(bp, newbp) == 0) + bundirty(bp); +#else + bundirty(bp); #endif /* @@ -1887,8 +1891,11 @@ ffs_bufwrite(struct buf *bp) */ bqrelse(bp); bp = newbp; - } + } else + /* Mark the buffer clean */ + bundirty(bp); + /* Let the normal bufwrite do the rest for us */ normal_write: return (bufwrite(bp)); Index: /usr/src/sys/ufs/ffs/ffs_softdep.c =================================================================== --- /usr/src/sys/ufs/ffs/ffs_softdep.c (revision 200709) +++ /usr/src/sys/ufs/ffs/ffs_softdep.c (working copy) @@ -62,6 +62,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -130,10 +131,12 @@ softdep_setup_inomapdep(bp, ip, newinum) } void -softdep_setup_blkmapdep(bp, mp, newblkno) +softdep_setup_blkmapdep(bp, mp, newblkno, frags, oldfrags) struct buf *bp; struct mount *mp; ufs2_daddr_t newblkno; + int frags; + int oldfrags; { panic("softdep_setup_blkmapdep called"); @@ -403,31 +406,13 @@ softdep_get_depcounts(struct mount *mp, * These definitions need to be adapted to the system to which * this file is being ported. */ -/* - * malloc types defined for the softdep system. - */ -static MALLOC_DEFINE(M_PAGEDEP, "pagedep","File page dependencies"); -static MALLOC_DEFINE(M_INODEDEP, "inodedep","Inode dependencies"); -static MALLOC_DEFINE(M_NEWBLK, "newblk","New block allocation"); -static MALLOC_DEFINE(M_BMSAFEMAP, "bmsafemap","Block or frag allocated from cyl group map"); -static MALLOC_DEFINE(M_ALLOCDIRECT, "allocdirect","Block or frag dependency for an inode"); -static MALLOC_DEFINE(M_INDIRDEP, "indirdep","Indirect block dependencies"); -static MALLOC_DEFINE(M_ALLOCINDIR, "allocindir","Block dependency for an indirect block"); -static MALLOC_DEFINE(M_FREEFRAG, "freefrag","Previously used frag for an inode"); -static MALLOC_DEFINE(M_FREEBLKS, "freeblks","Blocks freed from an inode"); -static MALLOC_DEFINE(M_FREEFILE, "freefile","Inode deallocated"); -static MALLOC_DEFINE(M_DIRADD, "diradd","New directory entry"); -static MALLOC_DEFINE(M_MKDIR, "mkdir","New directory"); -static MALLOC_DEFINE(M_DIRREM, "dirrem","Directory entry deleted"); -static MALLOC_DEFINE(M_NEWDIRBLK, "newdirblk","Unclaimed new directory block"); -static MALLOC_DEFINE(M_SAVEDINO, "savedino","Saved inodes"); #define M_SOFTDEP_FLAGS (M_WAITOK | M_USE_RESERVE) #define D_PAGEDEP 0 #define D_INODEDEP 1 -#define D_NEWBLK 2 -#define D_BMSAFEMAP 3 +#define D_BMSAFEMAP 2 +#define D_NEWBLK 3 #define D_ALLOCDIRECT 4 #define D_INDIRDEP 5 #define D_ALLOCINDIR 6 @@ -438,8 +423,62 @@ softdep_get_depcounts(struct mount *mp, #define D_MKDIR 11 #define D_DIRREM 12 #define D_NEWDIRBLK 13 -#define D_LAST D_NEWDIRBLK +#define D_FREEWORK 14 +#define D_FREEDEP 15 +#define D_JADDREF 16 +#define D_JREMREF 17 +#define D_JNEWBLK 18 +#define D_JFREEBLK 19 +#define D_JFREEFRAG 20 +#define D_JSEG 21 +#define D_JSEGDEP 22 +#define D_LAST D_JSEGDEP +unsigned long dep_current[D_LAST + 1]; +unsigned long dep_total[D_LAST + 1]; + + +SYSCTL_NODE(_debug, OID_AUTO, softdep, CTLFLAG_RW, 0, "soft updates stats"); +SYSCTL_NODE(_debug_softdep, OID_AUTO, total, CTLFLAG_RW, 0, + "total dependencies allocated"); +SYSCTL_NODE(_debug_softdep, OID_AUTO, current, CTLFLAG_RW, 0, + "current dependencies allocated"); + +#define SOFTDEP_TYPE(type, str, long) \ + static MALLOC_DEFINE(M_ ## type, #str, long); \ + SYSCTL_LONG(_debug_softdep_total, OID_AUTO, str, CTLFLAG_RD, \ + &dep_total[D_ ## type], 0, ""); \ + SYSCTL_LONG(_debug_softdep_current, OID_AUTO, str, CTLFLAG_RD, \ + &dep_current[D_ ## type], 0, ""); + +SOFTDEP_TYPE(PAGEDEP, pagedep, "File page dependencies"); +SOFTDEP_TYPE(INODEDEP, inodedep, "Inode dependencies"); +SOFTDEP_TYPE(BMSAFEMAP, bmsafemap, + "Block or frag allocated from cyl group map"); +SOFTDEP_TYPE(NEWBLK, newblk, "New block or frag allocation dependency"); +SOFTDEP_TYPE(ALLOCDIRECT, allocdirect, "Block or frag dependency for an inode"); +SOFTDEP_TYPE(INDIRDEP, indirdep, "Indirect block dependencies"); +SOFTDEP_TYPE(ALLOCINDIR, allocindir, "Block dependency for an indirect block"); +SOFTDEP_TYPE(FREEFRAG, freefrag, "Previously used frag for an inode"); +SOFTDEP_TYPE(FREEBLKS, freeblks, "Blocks freed from an inode"); +SOFTDEP_TYPE(FREEFILE, freefile, "Inode deallocated"); +SOFTDEP_TYPE(DIRADD, diradd, "New directory entry"); +SOFTDEP_TYPE(MKDIR, mkdir, "New directory"); +SOFTDEP_TYPE(DIRREM, dirrem, "Directory entry deleted"); +SOFTDEP_TYPE(NEWDIRBLK, newdirblk, "Unclaimed new directory block"); +SOFTDEP_TYPE(FREEWORK, freework, "free an inode block"); +SOFTDEP_TYPE(FREEDEP, freedep, "track a block free"); +SOFTDEP_TYPE(JADDREF, jaddref, "Journal inode ref add"); +SOFTDEP_TYPE(JREMREF, jremref, "Journal inode ref remove"); +SOFTDEP_TYPE(JNEWBLK, jnewblk, "Journal new block"); +SOFTDEP_TYPE(JFREEBLK, jfreeblk, "Journal free block"); +SOFTDEP_TYPE(JFREEFRAG, jfreefrag, "Journal free frag"); +SOFTDEP_TYPE(JSEG, jseg, "Journal segment"); +SOFTDEP_TYPE(JSEGDEP, jsegdep, "Journal segment complete"); + +static MALLOC_DEFINE(M_SAVEDINO, "savedino", "Saved inodes"); +static MALLOC_DEFINE(M_JBLOCKS, "jblocks", "Journal block locations"); + /* * translate from workitem type to memory type * MUST match the defines above, such that memtype[D_XXX] == M_XXX @@ -447,8 +486,8 @@ softdep_get_depcounts(struct mount *mp, static struct malloc_type *memtype[] = { M_PAGEDEP, M_INODEDEP, + M_BMSAFEMAP, M_NEWBLK, - M_BMSAFEMAP, M_ALLOCDIRECT, M_INDIRDEP, M_ALLOCINDIR, @@ -458,7 +497,16 @@ static struct malloc_type *memtype[] = { M_DIRADD, M_MKDIR, M_DIRREM, - M_NEWDIRBLK + M_NEWDIRBLK, + M_FREEWORK, + M_FREEDEP, + M_JADDREF, + M_JREMREF, + M_JNEWBLK, + M_JFREEBLK, + M_JFREEFRAG, + M_JSEG, + M_JSEGDEP }; #define DtoM(type) (memtype[type]) @@ -467,17 +515,21 @@ static struct malloc_type *memtype[] = { * Names of malloc types. */ #define TYPENAME(type) \ - ((unsigned)(type) < D_LAST ? memtype[type]->ks_shortdesc : "???") + ((unsigned)(type) <= D_LAST ? memtype[type]->ks_shortdesc : "???") /* * End system adaptation definitions. */ +#define DOTDOT_OFFSET offsetof(struct dirtemplate, dotdot_ino) +#define DOT_OFFSET offsetof(struct dirtemplate, dot_ino) + /* * Forward declarations. */ struct inodedep_hashhead; struct newblk_hashhead; struct pagedep_hashhead; +struct bmsafemap_hashhead; /* * Internal function prototypes. @@ -493,53 +545,123 @@ static int flush_inodedep_deps(struct mount *, ino static int flush_deplist(struct allocdirectlst *, int, int *); static int handle_written_filepage(struct pagedep *, struct buf *); static void diradd_inode_written(struct diradd *, struct inodedep *); +static int handle_written_indirdep(struct indirdep *, struct buf *, + struct buf**); static int handle_written_inodeblock(struct inodedep *, struct buf *); -static void handle_allocdirect_partdone(struct allocdirect *); +static int handle_written_bmsafemap(struct bmsafemap *, struct buf *); +static void handle_written_jaddref(struct jaddref *, struct jsegdep *); +static void handle_written_jremref(struct jremref *, struct jsegdep *); +static void handle_written_jseg(struct jseg *, struct buf *); +static void handle_written_jnewblk(struct jnewblk *, struct jsegdep *); +static void handle_written_jfreeblk(struct jfreeblk *, struct jsegdep *); +static void handle_written_jfreefrag(struct jfreefrag *, struct jsegdep *); +static void jseg_write(struct jblocks *, struct jseg *, uint8_t *); +static void jaddref_write(struct jaddref *, uint8_t *); +static void jremref_write(struct jremref *, uint8_t *); +static void jnewblk_write(struct jnewblk *, uint8_t *); +static void jfreeblk_write(struct jfreeblk *, uint8_t *); +static void jfreefrag_write(struct jfreefrag *, uint8_t *); +static void handle_allocdirect_partdone(struct allocdirect *, + struct workhead *); +static void cancel_newblk(struct newblk *, struct workhead *, short type); +static void handle_workitem_indirdep(struct indirdep *); +static void indirdep_complete(struct indirdep *); static void handle_allocindir_partdone(struct allocindir *); static void initiate_write_filepage(struct pagedep *, struct buf *); +static void initiate_write_indirdep(struct indirdep*, struct buf *); static void handle_written_mkdir(struct mkdir *, int); +static void initiate_write_bmsafemap(struct bmsafemap *, struct buf *); static void initiate_write_inodeblock_ufs1(struct inodedep *, struct buf *); static void initiate_write_inodeblock_ufs2(struct inodedep *, struct buf *); static void handle_workitem_freefile(struct freefile *); static void handle_workitem_remove(struct dirrem *, struct vnode *); static struct dirrem *newdirrem(struct buf *, struct inode *, struct inode *, int, struct dirrem **); -static void free_diradd(struct diradd *); -static void free_allocindir(struct allocindir *, struct inodedep *); +static void free_indirdep(struct indirdep *); +static void free_diradd(struct diradd *, struct workhead *); +static void merge_diradd(struct inodedep *, struct diradd *); +static void complete_diradd(struct diradd *); +static struct diradd *diradd_lookup(struct pagedep *, int); +static void cancel_diradd_dotdot(struct inode *, struct dirrem *); +static void cancel_allocindir(struct allocindir *, struct inodedep *, + struct freeblks *); +static void complete_mkdir(struct mkdir *); static void free_newdirblk(struct newdirblk *); -static int indir_trunc(struct freeblks *, ufs2_daddr_t, int, ufs_lbn_t, - ufs2_daddr_t *); -static void deallocate_dependencies(struct buf *, struct inodedep *); -static void free_allocdirect(struct allocdirectlst *, - struct allocdirect *, int); +static void free_jremref(struct jremref *); +static void free_jaddref(struct jaddref *); +static void free_jsegdep(struct jsegdep *); +static void free_jseg(struct jseg *); +static void free_jnewblk(struct jnewblk *); +static void free_jfreeblk(struct jfreeblk *); +static void free_jfreefrag(struct jfreefrag *); +static void free_freedep(struct freedep *); +static void cancel_jnewblk(struct jnewblk *, struct workhead *); +static void cancel_jaddref(struct jaddref *, struct inodedep *, + struct workhead *); +static void cancel_jfreefrag(struct jfreefrag *); +static void indir_trunc(struct freework *, ufs2_daddr_t, ufs_lbn_t); +static int deallocate_dependencies(struct buf *, struct inodedep *, + struct freeblks *); +static void free_newblk(struct newblk *); +static void cancel_allocdirect(struct allocdirectlst *, + struct allocdirect *, struct freeblks *, int); static int check_inode_unwritten(struct inodedep *); static int free_inodedep(struct inodedep *); +static void freework_freeblock(struct freework *); static void handle_workitem_freeblocks(struct freeblks *, int); +static void handle_complete_freeblocks(struct freeblks *); +static void handle_workitem_indirblk(struct freework *); +static void handle_written_freework(struct freework *); static void merge_inode_lists(struct allocdirectlst *,struct allocdirectlst *); static void setup_allocindir_phase2(struct buf *, struct inode *, - struct allocindir *); + struct inodedep *, struct allocindir *, ufs_lbn_t); static struct allocindir *newallocindir(struct inode *, int, ufs2_daddr_t, - ufs2_daddr_t); + ufs2_daddr_t, ufs_lbn_t); static void handle_workitem_freefrag(struct freefrag *); -static struct freefrag *newfreefrag(struct inode *, ufs2_daddr_t, long); +static struct freefrag *newfreefrag(struct inode *, ufs2_daddr_t, long, + ufs_lbn_t); static void allocdirect_merge(struct allocdirectlst *, struct allocdirect *, struct allocdirect *); -static struct bmsafemap *bmsafemap_lookup(struct mount *, struct buf *); -static int newblk_find(struct newblk_hashhead *, struct fs *, ufs2_daddr_t, - struct newblk **); -static int newblk_lookup(struct fs *, ufs2_daddr_t, int, struct newblk **); +static struct freefrag *allocindir_merge(struct allocindir *, + struct allocindir *); +static int bmsafemap_find(struct bmsafemap_hashhead *, struct mount *, int, + struct bmsafemap **); +static struct bmsafemap *bmsafemap_lookup(struct mount *, struct buf *, + int cg); +static int newblk_find(struct newblk_hashhead *, struct mount *, ufs2_daddr_t, + int, struct newblk **); +static int newblk_lookup(struct mount *, ufs2_daddr_t, int, struct newblk **); static int inodedep_find(struct inodedep_hashhead *, struct fs *, ino_t, struct inodedep **); static int inodedep_lookup(struct mount *, ino_t, int, struct inodedep **); -static int pagedep_lookup(struct inode *, ufs_lbn_t, int, struct pagedep **); +static int pagedep_lookup(struct mount *, ino_t, ufs_lbn_t, int, + struct pagedep **); static int pagedep_find(struct pagedep_hashhead *, ino_t, ufs_lbn_t, struct mount *mp, int, struct pagedep **); static void pause_timer(void *); static int request_cleanup(struct mount *, int); static int process_worklist_item(struct mount *, int); +static void jwork_move(short, short, struct workhead *, struct workhead *); static void add_to_worklist(struct worklist *); static void softdep_flush(void); static int softdep_speedup(void); +static int journal_mount(struct mount *, struct fs *, struct ucred *); +static void add_to_journal(struct worklist *); +static void remove_from_journal(struct worklist *); +static void softdep_process_journal(struct mount *, int); +static struct jremref *newjremref(struct dirrem *, struct inode *, + struct inode *ip, off_t); +static struct jaddref *newjaddref(struct inode *, ino_t, off_t, int16_t, + uint16_t); +static struct jfreeblk *newjfreeblk(struct freeblks *, ufs_lbn_t, + ufs2_daddr_t, int); +static struct freework *newfreework(struct freeblks *, struct freework *, + ufs_lbn_t, ufs2_daddr_t, int, int); +static void jwait(struct worklist *wk); +static struct inodedep *inodedep_lookup_ip(struct inode *); +static int bmsafemap_rollbacks(struct bmsafemap *); +static struct freefile *handle_bufwait(struct inodedep *, struct workhead *); +static void handle_jwork(struct workhead *); /* * Exported softdep operations. @@ -572,40 +694,134 @@ MTX_SYSINIT(softdep_lock, &lk, "Softdep Lock", MTX (item)->wk_state &= ~ONWORKLIST; \ LIST_REMOVE(item, wk_list); \ } while (0) +#define WORKLIST_INSERT_UNLOCKED WORKLIST_INSERT +#define WORKLIST_REMOVE_UNLOCKED WORKLIST_REMOVE + #else /* DEBUG */ -static void worklist_insert(struct workhead *, struct worklist *); -static void worklist_remove(struct worklist *); +static void worklist_insert(struct workhead *, struct worklist *, int); +static void worklist_remove(struct worklist *, int); -#define WORKLIST_INSERT(head, item) worklist_insert(head, item) -#define WORKLIST_REMOVE(item) worklist_remove(item) +#define WORKLIST_INSERT(head, item) worklist_insert(head, item, 1) +#define WORKLIST_INSERT_UNLOCKED(head, item) worklist_insert(head, item, 0) +#define WORKLIST_REMOVE(item) worklist_remove(item, 1) +#define WORKLIST_REMOVE_UNLOCKED(item) worklist_remove(item, 0) static void -worklist_insert(head, item) +worklist_insert(head, item, locked) struct workhead *head; struct worklist *item; + int locked; { - mtx_assert(&lk, MA_OWNED); + if (locked) + mtx_assert(&lk, MA_OWNED); if (item->wk_state & ONWORKLIST) - panic("worklist_insert: already on list"); + panic("worklist_insert: %p %s(0x%X) already on list", + item, TYPENAME(item->wk_type), item->wk_state); item->wk_state |= ONWORKLIST; LIST_INSERT_HEAD(head, item, wk_list); } static void -worklist_remove(item) +worklist_remove(item, locked) struct worklist *item; + int locked; { - mtx_assert(&lk, MA_OWNED); + if (locked) + mtx_assert(&lk, MA_OWNED); if ((item->wk_state & ONWORKLIST) == 0) - panic("worklist_remove: not on list"); + panic("worklist_remove: %p %s(0x%X) not on list", + item, TYPENAME(item->wk_type), item->wk_state); item->wk_state &= ~ONWORKLIST; LIST_REMOVE(item, wk_list); } #endif /* DEBUG */ /* + * Merge two jsegdeps keeping only the oldest one as newer references + * can't be discarded until after older references. + */ +static inline struct jsegdep * +jsegdep_merge(struct jsegdep *one, struct jsegdep *two) +{ + struct jsegdep *swp; + + if (two == NULL) + return (one); + + if (one->jd_seg->js_seq > two->jd_seg->js_seq) { + swp = one; + one = two; + two = swp; + } + WORKLIST_REMOVE(&two->jd_list); + free_jsegdep(two); + + return (one); +} + +/* + * If two freedeps are compatible free one to reduce list size. + */ +static inline struct freedep * +freedep_merge(struct freedep *one, struct freedep *two) +{ + if (two == NULL) + return (one); + + if (one->fd_freework == two->fd_freework) { + WORKLIST_REMOVE(&two->fd_list); + free_freedep(two); + } + return (one); +} + +/* + * Move journal work from one list to another. Duplicate freedeps and + * jsegdeps are coalesced to keep the lists as small as possible. + */ +static void +jwork_move(type, line, dst, src) + short type; + short line; + struct workhead *dst; + struct workhead *src; +{ + struct freedep *freedep; + struct jsegdep *jsegdep; + struct worklist *wkn; + struct worklist *wk; + + KASSERT(dst != src, + ("jwork_move: dst == src")); + freedep = NULL; + jsegdep = NULL; + LIST_FOREACH_SAFE(wk, dst, wk_list, wkn) { + if (wk->wk_type == D_JSEGDEP) + jsegdep = jsegdep_merge(WK_JSEGDEP(wk), jsegdep); + if (wk->wk_type == D_FREEDEP) + freedep = freedep_merge(WK_FREEDEP(wk), freedep); + } + + mtx_assert(&lk, MA_OWNED); + while ((wk = LIST_FIRST(src)) != NULL) { + WORKLIST_REMOVE(wk); + WORKLIST_INSERT(dst, wk); + if (wk->wk_type == D_JSEGDEP) { + jsegdep = jsegdep_merge(WK_JSEGDEP(wk), jsegdep); +#ifdef DEBUG + jsegdep->jd_type = type; + jsegdep->jd_line = line; +#endif + continue; + } + if (wk->wk_type == D_FREEDEP) + freedep = freedep_merge(WK_FREEDEP(wk), freedep); + } +} + +/* * Routines for tracking and managing workitems. */ static void workitem_free(struct worklist *, int); @@ -623,13 +839,16 @@ workitem_free(item, type) #ifdef DEBUG if (item->wk_state & ONWORKLIST) - panic("workitem_free: still on list"); + panic("workitem_free: %s(0x%X) still on list", + TYPENAME(item->wk_type), item->wk_state); if (item->wk_type != type) - panic("workitem_free: type mismatch"); + panic("workitem_free: type mismatch %s != %s", + TYPENAME(item->wk_type), TYPENAME(type)); #endif ump = VFSTOUFS(item->wk_mp); if (--ump->softdep_deps == 0 && ump->softdep_req) wakeup(&ump->softdep_deps); + dep_current[type]--; free(item, DtoM(type)); } @@ -643,6 +862,8 @@ workitem_alloc(item, type, mp) item->wk_mp = mp; item->wk_state = 0; ACQUIRE_LOCK(&lk); + dep_current[type]++; + dep_total[type]++; VFSTOUFS(mp)->softdep_deps++; VFSTOUFS(mp)->softdep_accdeps++; FREE_LOCK(&lk); @@ -696,6 +917,9 @@ SYSCTL_INT(_debug, OID_AUTO, dir_entry, CTLFLAG_RW SYSCTL_DECL(_vfs_ffs); +LIST_HEAD(bmsafemap_hashhead, bmsafemap) *bmsafemap_hashtbl; +static u_long bmsafemap_hash; /* size of hash table - 1 */ + static int compute_summary_at_mount = 0; /* Whether to recompute the summary at mount time */ SYSCTL_INT(_vfs_ffs, OID_AUTO, compute_summary_at_mount, CTLFLAG_RW, &compute_summary_at_mount, 0, "Recompute summary at mount"); @@ -779,7 +1003,7 @@ softdep_speedup(void) req_pending = 1; wakeup(&req_pending); } - + /* XXX Don't we really want to speedup the buf daemon? */ return speedup_syncer(); } @@ -799,7 +1023,8 @@ add_to_worklist(wk) mtx_assert(&lk, MA_OWNED); ump = VFSTOUFS(wk->wk_mp); if (wk->wk_state & ONWORKLIST) - panic("add_to_worklist: already on list"); + panic("add_to_worklist: %s(0x%X) already on list", + TYPENAME(wk->wk_type), wk->wk_state); wk->wk_state |= ONWORKLIST; if (LIST_EMPTY(&ump->softdep_workitem_pending)) LIST_INSERT_HEAD(&ump->softdep_workitem_pending, wk, wk_list); @@ -838,6 +1063,7 @@ softdep_process_worklist(mp, full) ACQUIRE_LOCK(&lk); loopcount = 1; starttime = time_second; + softdep_process_journal(mp, 0); while (ump->softdep_on_worklist > 0) { if ((cnt = process_worklist_item(mp, 0)) == -1) break; @@ -969,6 +1195,15 @@ process_worklist_item(mp, flags) handle_workitem_freefile(WK_FREEFILE(wk)); break; + case D_FREEWORK: + /* Final block in an indirect was freed. */ + handle_workitem_indirblk(WK_FREEWORK(wk)); + break; + + case D_INDIRDEP: + handle_workitem_indirdep(WK_INDIRDEP(wk)); + break; + default: panic("%s_process_worklist: Unknown type %s", "softdep", TYPENAME(wk->wk_type)); @@ -982,19 +1217,22 @@ process_worklist_item(mp, flags) /* * Move dependencies from one buffer to another. */ -void +int softdep_move_dependencies(oldbp, newbp) struct buf *oldbp; struct buf *newbp; { struct worklist *wk, *wktail; + int dirty; - if (!LIST_EMPTY(&newbp->b_dep)) - panic("softdep_move_dependencies: need merge code"); - wktail = 0; + dirty = 0; + wktail = NULL; ACQUIRE_LOCK(&lk); while ((wk = LIST_FIRST(&oldbp->b_dep)) != NULL) { LIST_REMOVE(wk, wk_list); + if (wk->wk_type == D_BMSAFEMAP && + bmsafemap_rollbacks(WK_BMSAFEMAP(wk))) + dirty = 1; if (wktail == 0) LIST_INSERT_HEAD(&newbp->b_dep, wk, wk_list); else @@ -1002,6 +1240,8 @@ softdep_move_dependencies(oldbp, newbp) wktail = wk; } FREE_LOCK(&lk); + + return (dirty); } /* @@ -1198,23 +1438,22 @@ pagedep_find(pagedephd, ino, lbn, mp, flags, paged * This routine must be called with splbio interrupts blocked. */ static int -pagedep_lookup(ip, lbn, flags, pagedeppp) - struct inode *ip; +pagedep_lookup(mp, ino, lbn, flags, pagedeppp) + struct mount *mp; + ino_t ino; ufs_lbn_t lbn; int flags; struct pagedep **pagedeppp; { struct pagedep *pagedep; struct pagedep_hashhead *pagedephd; - struct mount *mp; int ret; int i; mtx_assert(&lk, MA_OWNED); - mp = ITOV(ip)->v_mount; - pagedephd = PAGEDEP_HASH(mp, ip->i_number, lbn); + pagedephd = PAGEDEP_HASH(mp, ino, lbn); - ret = pagedep_find(pagedephd, ip->i_number, lbn, mp, flags, pagedeppp); + ret = pagedep_find(pagedephd, ino, lbn, mp, flags, pagedeppp); if (*pagedeppp || (flags & DEPALLOC) == 0) return (ret); FREE_LOCK(&lk); @@ -1222,12 +1461,12 @@ static int M_PAGEDEP, M_SOFTDEP_FLAGS|M_ZERO); workitem_alloc(&pagedep->pd_list, D_PAGEDEP, mp); ACQUIRE_LOCK(&lk); - ret = pagedep_find(pagedephd, ip->i_number, lbn, mp, flags, pagedeppp); + ret = pagedep_find(pagedephd, ino, lbn, mp, flags, pagedeppp); if (*pagedeppp) { WORKITEM_FREE(pagedep, D_PAGEDEP); return (ret); } - pagedep->pd_ino = ip->i_number; + pagedep->pd_ino = ino; pagedep->pd_lbn = lbn; LIST_INIT(&pagedep->pd_dirremhd); LIST_INIT(&pagedep->pd_pendinghd); @@ -1314,10 +1553,12 @@ inodedep_lookup(mp, inum, flags, inodedeppp) inodedep->id_savedino1 = NULL; inodedep->id_savedsize = -1; inodedep->id_savedextsize = -1; - inodedep->id_buf = NULL; + inodedep->id_bmsafemap = NULL; + inodedep->id_mkdiradd = NULL; LIST_INIT(&inodedep->id_pendinghd); LIST_INIT(&inodedep->id_inowait); LIST_INIT(&inodedep->id_bufwait); + LIST_INIT(&inodedep->id_jaddrefhd); TAILQ_INIT(&inodedep->id_inoupdt); TAILQ_INIT(&inodedep->id_newinoupdt); TAILQ_INIT(&inodedep->id_extupdt); @@ -1336,17 +1577,29 @@ u_long newblk_hash; /* size of hash table - 1 */ (&newblk_hashtbl[((((register_t)(fs)) >> 13) + (inum)) & newblk_hash]) static int -newblk_find(newblkhd, fs, newblkno, newblkpp) +newblk_find(newblkhd, mp, newblkno, flags, newblkpp) struct newblk_hashhead *newblkhd; - struct fs *fs; + struct mount *mp; ufs2_daddr_t newblkno; + int flags; struct newblk **newblkpp; { struct newblk *newblk; - LIST_FOREACH(newblk, newblkhd, nb_hash) - if (newblkno == newblk->nb_newblkno && fs == newblk->nb_fs) - break; + LIST_FOREACH(newblk, newblkhd, nb_hash) { + if (newblkno != newblk->nb_newblkno) + continue; + if (mp != newblk->nb_list.wk_mp) + continue; + /* + * If we're creating a new dependency don't match those that + * have already been converted to allocdirects. This is for + * a frag extend. + */ + if ((flags & DEPALLOC) && newblk->nb_list.wk_type != D_NEWBLK) + continue; + break; + } if (newblk) { *newblkpp = newblk; return (1); @@ -1361,8 +1614,8 @@ static int * Found or allocated entry is returned in newblkpp. */ static int -newblk_lookup(fs, newblkno, flags, newblkpp) - struct fs *fs; +newblk_lookup(mp, newblkno, flags, newblkpp) + struct mount *mp; ufs2_daddr_t newblkno; int flags; struct newblk **newblkpp; @@ -1370,21 +1623,25 @@ static int struct newblk *newblk; struct newblk_hashhead *newblkhd; - newblkhd = NEWBLK_HASH(fs, newblkno); - if (newblk_find(newblkhd, fs, newblkno, newblkpp)) + newblkhd = NEWBLK_HASH(VFSTOUFS(mp)->um_fs, newblkno); + if (newblk_find(newblkhd, mp, newblkno, flags, newblkpp)) return (1); if ((flags & DEPALLOC) == 0) return (0); FREE_LOCK(&lk); - newblk = malloc(sizeof(struct newblk), - M_NEWBLK, M_SOFTDEP_FLAGS); + newblk = malloc(sizeof(union allblk), M_NEWBLK, + M_SOFTDEP_FLAGS | M_ZERO); + workitem_alloc(&newblk->nb_list, D_NEWBLK, mp); ACQUIRE_LOCK(&lk); - if (newblk_find(newblkhd, fs, newblkno, newblkpp)) { - free(newblk, M_NEWBLK); + if (newblk_find(newblkhd, mp, newblkno, flags, newblkpp)) { + WORKITEM_FREE(newblk, D_NEWBLK); return (1); } - newblk->nb_state = 0; - newblk->nb_fs = fs; + newblk->nb_freefrag = NULL; + LIST_INIT(&newblk->nb_indirdeps); + LIST_INIT(&newblk->nb_newdirblk); + LIST_INIT(&newblk->nb_jwork); + newblk->nb_state = ATTACHED; newblk->nb_newblkno = newblkno; LIST_INSERT_HEAD(newblkhd, newblk, nb_hash); *newblkpp = newblk; @@ -1401,10 +1658,10 @@ softdep_initialize() LIST_INIT(&mkdirlisthd); max_softdeps = desiredvnodes * 4; - pagedep_hashtbl = hashinit(desiredvnodes / 5, M_PAGEDEP, - &pagedep_hash); + pagedep_hashtbl = hashinit(desiredvnodes / 5, M_PAGEDEP, &pagedep_hash); inodedep_hashtbl = hashinit(desiredvnodes, M_INODEDEP, &inodedep_hash); - newblk_hashtbl = hashinit(64, M_NEWBLK, &newblk_hash); + newblk_hashtbl = hashinit(desiredvnodes / 5, M_NEWBLK, &newblk_hash); + bmsafemap_hashtbl = hashinit(1024, M_BMSAFEMAP, &bmsafemap_hash); /* initialise bioops hack */ bioops.io_start = softdep_disk_io_initiation; @@ -1428,6 +1685,7 @@ softdep_uninitialize() hashdestroy(pagedep_hashtbl, M_PAGEDEP, pagedep_hash); hashdestroy(inodedep_hashtbl, M_INODEDEP, inodedep_hash); hashdestroy(newblk_hashtbl, M_NEWBLK, newblk_hash); + hashdestroy(bmsafemap_hashtbl, M_BMSAFEMAP, bmsafemap_hash); } /* @@ -1457,9 +1715,14 @@ softdep_mount(devvp, mp, fs, cred) MNT_IUNLOCK(mp); ump = VFSTOUFS(mp); LIST_INIT(&ump->softdep_workitem_pending); + LIST_INIT(&ump->softdep_journal_pending); ump->softdep_worklist_tail = NULL; ump->softdep_on_worklist = 0; ump->softdep_deps = 0; + if ((error = journal_mount(mp, fs, cred)) != 0) { + printf("Failed to start journal: %d\n", error); + return (error); + } /* * When doing soft updates, the counters in the * superblock may have gotten out of sync. Recomputation @@ -1493,7 +1756,1481 @@ softdep_mount(devvp, mp, fs, cred) return (0); } +struct jblocks { + struct jseglst jb_segs; /* TAILQ of current segments. */ + struct jextent *jb_extent; /* Extent array. */ + uint64_t jb_nextseq; /* Next sequence number. */ + uint64_t jb_oldestseq; /* Oldest active sequence number. */ + int jb_avail; /* Available extents. */ + int jb_used; /* Last used extent. */ + int jb_head; /* Allocator head. */ + int jb_off; /* Allocator extent offset. */ + int jb_blocks; /* Total disk blocks covered. */ + int jb_free; /* Total disk blocks free. */ +}; +struct jextent { + ufs2_daddr_t je_daddr; /* Disk block address. */ + int je_blocks; /* Disk block count. */ +}; + +static struct jblocks * +jblocks_create(void) +{ + struct jblocks *jblocks; + + jblocks = malloc(sizeof(*jblocks), M_JBLOCKS, M_WAITOK | M_ZERO); + TAILQ_INIT(&jblocks->jb_segs); + jblocks->jb_avail = 10; + jblocks->jb_extent = malloc(sizeof(struct jextent) * jblocks->jb_avail, + M_JBLOCKS, M_WAITOK | M_ZERO); + + return (jblocks); +} + +static ufs2_daddr_t +jblocks_alloc(struct jblocks *jblocks, int bytes, int *actual) +{ + ufs2_daddr_t daddr; + struct jextent *jext; + int freecnt; + int blocks; + + blocks = bytes / DEV_BSIZE; + jext = &jblocks->jb_extent[jblocks->jb_head]; + freecnt = jext->je_blocks - jblocks->jb_off; + if (freecnt == 0) { + jblocks->jb_off = 0; + if (++jblocks->jb_head > jblocks->jb_used) + jblocks->jb_head = 0; + jext = &jblocks->jb_extent[jblocks->jb_head]; + freecnt = jext->je_blocks; + } + if (freecnt > blocks) + freecnt = blocks; + *actual = freecnt * DEV_BSIZE; + daddr = jext->je_daddr + jblocks->jb_off; + jblocks->jb_off += freecnt; + jblocks->jb_free -= freecnt; + + return (daddr); +} + +static void +jblocks_free(struct jblocks *jblocks, int bytes) +{ + + if (jblocks->jb_free == 0) + wakeup(jblocks); + jblocks->jb_free += bytes / DEV_BSIZE; +} + +static void +jblocks_destroy(struct jblocks *jblocks) +{ + + if (jblocks->jb_extent) + free(jblocks->jb_extent, M_JBLOCKS); + free(jblocks, M_JBLOCKS); +} + +static void +jblocks_add(struct jblocks *jblocks, ufs2_daddr_t daddr, int blocks) +{ + struct jextent *jext; + + jblocks->jb_blocks += blocks; + jblocks->jb_free += blocks; + jext = &jblocks->jb_extent[jblocks->jb_used]; + /* Adding the first block. */ + if (jext->je_daddr == 0) { + jext->je_daddr = daddr; + jext->je_blocks = blocks; + return; + } + /* Extending the last extent. */ + if (jext->je_daddr + jext->je_blocks == daddr) { + jext->je_blocks += blocks; + return; + } + /* Adding a new extent. */ + if (++jblocks->jb_used == jblocks->jb_avail) { + jblocks->jb_avail *= 2; + jext = malloc(sizeof(struct jextent) * jblocks->jb_avail, + M_JBLOCKS, M_WAITOK | M_ZERO); + memcpy(jext, jblocks->jb_extent, + sizeof(struct jextent) * jblocks->jb_used); + free(jblocks->jb_extent, M_JBLOCKS); + jblocks->jb_extent = jext; + } + jext = &jblocks->jb_extent[jblocks->jb_used]; + jext->je_daddr = daddr; + jext->je_blocks = blocks; + return; +} + +static int +journal_mount(mp, fs, cred) + struct mount *mp; + struct fs *fs; + struct ucred *cred; +{ + struct jblocks *jblocks; + struct vnode *vp; + struct inode *ip; + ufs2_daddr_t blkno; + int bcount; + int error; + int i; + + error = VFS_VGET(mp, fs->fs_sujournal, LK_EXCLUSIVE, &vp); + if (error) + return (error); + ip = VTOI(vp); + if (ip->i_size < 1 * 1024 * 1024) { + error = ENOSPC; + goto out; + } + bcount = lblkno(fs, ip->i_size); /* Only use whole blocks. */ + jblocks = jblocks_create(); + for (i = 0; i < bcount; i++) { + error = ufs_bmaparray(vp, i, &blkno, NULL, NULL, NULL); + if (error) + break; + jblocks_add(jblocks, blkno, fsbtodb(fs, fs->fs_frag)); + } + if (error) + jblocks_destroy(jblocks); + else + VFSTOUFS(mp)->softdep_jblocks = jblocks; +out: + vput(vp); + return (error); +} + /* + * Called when a journal record is ready to be written. Space is allocated + * and the journal entry is created when the journal is flushed to stable + * store. + */ +static void +add_to_journal(wk) + struct worklist *wk; +{ + struct ufsmount *ump; + + mtx_assert(&lk, MA_OWNED); + if ((wk->wk_state & DEPCOMPLETE) == 0) + panic("add_to_journal: Invalid wk state 0x%X", wk->wk_state); + ump = VFSTOUFS(wk->wk_mp); + if (wk->wk_state & ONWORKLIST) + panic("add_to_journal: %s(0x%X) already on list", + TYPENAME(wk->wk_type), wk->wk_state); + wk->wk_state |= ONWORKLIST; + if (LIST_EMPTY(&ump->softdep_journal_pending)) + LIST_INSERT_HEAD(&ump->softdep_journal_pending, wk, wk_list); + else + LIST_INSERT_AFTER(ump->softdep_journal_tail, wk, wk_list); + ump->softdep_journal_tail = wk; + ump->softdep_on_journal += 1; +} + +/* + * Remove an arbitrary item for the journal worklist maintain the tail + * pointer. This happens when a new operation obviates the need to + * journal an old operation. + */ +static void +remove_from_journal(wk) + struct worklist *wk; +{ + struct ufsmount *ump; + + mtx_assert(&lk, MA_OWNED); + ump = VFSTOUFS(wk->wk_mp); +#ifdef DEBUG /* XXX Expensive, temporary. */ + { + struct worklist *wkn; + + LIST_FOREACH(wkn, &ump->softdep_journal_pending, wk_list) + if (wkn == wk) + break; + if (wkn == NULL) + panic("remove_from_journal: %p is not in journal", wk); + } +#endif + /* + * We emulate a TAILQ to save space in most structures which do not + * require TAILQ semantics. Here we must update the tail position + * when removing the tail which is not the final entry. + */ + if (ump->softdep_journal_tail == wk) + ump->softdep_journal_tail = + (struct worklist *)wk->wk_list.le_prev; + + WORKLIST_REMOVE(wk); + ump->softdep_on_journal -= 1; +} + +#if 0 +#ifdef DDB +DB_SHOW_COMMAND(softdepsegs, db_show_softdepsegs) +{ + struct jsegdep *jsegdep; + struct jseg *jseg; + int i; + + TAILQ_FOREACH(jseg, &allsegs, js_next) { + db_printf("%p (%ld): %d refs\n", + jseg, jseg->js_seq, jseg->js_refs); + if (jseg->js_refs == 0) + continue; + for (i = 0; i < jseg->js_cnt; i++) { + jsegdep = &jseg->js_deps[i]; + if ((jsegdep->jd_state & ATTACHED) == 0) + continue; + db_printf("\tseg %p allocated to %s @ %d\n", + jsegdep, TYPENAME(jsegdep->jd_type), + jsegdep->jd_line); + } + } +} + +#endif +#endif + +static void +jseg_write(jblocks, jseg, data) + struct jblocks *jblocks; + struct jseg *jseg; + uint8_t *data; +{ + struct jsegrec *rec; + + rec = (struct jsegrec *)data; + rec->jsr_seq = jseg->js_seq; + rec->jsr_oldest = jblocks->jb_oldestseq; + rec->jsr_cnt = jseg->js_cnt; + rec->jsr_crc = 0; +} + +static void +jaddref_write(jaddref, data) + struct jaddref *jaddref; + uint8_t *data; +{ + struct jrefrec *rec; + + rec = (struct jrefrec *)data; + rec->jr_op = JOP_ADDREF; + rec->jr_ino = jaddref->ja_ino; + rec->jr_parent = jaddref->ja_parent; + rec->jr_nlink = jaddref->ja_nlink; + rec->jr_mode = jaddref->ja_mode; + rec->jr_diroff = jaddref->ja_diroff; +} + +static void +jremref_write(jremref, data) + struct jremref *jremref; + uint8_t *data; +{ + struct jrefrec *rec; + + rec = (struct jrefrec *)data; + rec->jr_op = JOP_REMREF; + rec->jr_ino = jremref->jr_ino; + rec->jr_parent = jremref->jr_parent; + rec->jr_nlink = jremref->jr_nlink; + rec->jr_mode = jremref->jr_mode; + rec->jr_diroff = jremref->jr_diroff; +} + +static void +jnewblk_write(jnewblk, data) + struct jnewblk *jnewblk; + uint8_t *data; +{ + struct jblkrec *rec; + + rec = (struct jblkrec *)data; + rec->jb_op = JOP_NEWBLK; + rec->jb_ino = jnewblk->jn_ino; + rec->jb_blkno = jnewblk->jn_blkno; + rec->jb_lbn = jnewblk->jn_lbn; + rec->jb_frags = jnewblk->jn_frags; + rec->jb_oldfrags = jnewblk->jn_oldfrags; +} + +static void +jfreeblk_write(jfreeblk, data) + struct jfreeblk *jfreeblk; + uint8_t *data; +{ + struct jblkrec *rec; + + rec = (struct jblkrec *)data; + rec->jb_op = JOP_FREEBLK; + rec->jb_ino = jfreeblk->jf_ino; + rec->jb_blkno = jfreeblk->jf_blkno; + rec->jb_lbn = jfreeblk->jf_lbn; + rec->jb_frags = jfreeblk->jf_frags; + rec->jb_oldfrags = 0; +} + +static void +jfreefrag_write(jfreefrag, data) + struct jfreefrag *jfreefrag; + uint8_t *data; +{ + struct jblkrec *rec; + + rec = (struct jblkrec *)data; + rec->jb_op = JOP_FREEBLK; + rec->jb_ino = jfreefrag->fr_ino; + rec->jb_blkno = jfreefrag->fr_blkno; + rec->jb_lbn = jfreefrag->fr_lbn; + rec->jb_frags = jfreefrag->fr_frags; + rec->jb_oldfrags = 0; +} + +/* + * Flush some journal records to disk. + */ +static void +softdep_process_journal(mp, flags) + struct mount *mp; + int flags; +{ + struct jblocks *jblocks; + struct ufsmount *ump; + struct worklist *wk; + struct jseg *jseg; + struct buf *bp; + uint8_t *data; + struct fs *fs; + int segwritten; + int jrecpb; /* records per fs block. */ + int jrecpf; /* records per fs frag. */ + int size; + int cnt; + int i; + + ump = VFSTOUFS(mp); + fs = ump->um_fs; + jblocks = ump->softdep_jblocks; + jrecpb = fs->fs_bsize / JREC_SIZE; + jrecpf = fs->fs_fsize / JREC_SIZE; + segwritten = 0; + while ((cnt = ump->softdep_on_journal) != 0) { + /* + * Create a new segment to hold as many as 'cnt' journal + * entries and add them to the segment. Notice cnt is + * off by one to account for the space required by the + * jsegrec. If we don't have a full block to log skip it + * unless flags == MNT_WAIT and we haven't written anything. + */ + cnt++; + if (cnt < jrecpb && (flags != MNT_WAIT || segwritten != 0)) + return; + if (cnt < jrecpb) + cnt = roundup2(cnt, jrecpf); + else + cnt = jrecpb; + /* + * If there is no space try to clean up some entries. + */ + while (jblocks->jb_free == 0) { + if (flags != MNT_WAIT) + return; + /* + * Items in the worklist should release journal + * space when they free storage. Process them + * until we have enough space to proceed. If + * we can't process an item we need to flush more + * bufs and wait. + */ +#if 0 /* + * XXX Currently this deadlocks because bufs are + * held locked while waiting on journal writes in + * truncate and softdep_update_inodeblock(). + */ + printf("softdep: Out of journal space!\n"); + if (process_worklist_item(mp, LK_NOWAIT) != -1) + continue; +#endif + printf("softdep: Out of journal space, no progress."); + softdep_speedup(); + msleep(jblocks, &lk, PRIBIO, "jblocks", 0); + } + FREE_LOCK(&lk); + cnt--; /* Temporarily forget the segment record. */ + size = sizeof(*jseg) + (sizeof(struct jsegdep) * cnt); + jseg = malloc(size, M_JSEG, M_SOFTDEP_FLAGS); + workitem_alloc(&jseg->js_list, D_JSEG, mp); + LIST_INIT(&jseg->js_entries); + jseg->js_refs = 1; /* Self reference. */ + jseg->js_jblocks = jblocks; + for (i = 0; i < cnt; i++) { + jseg->js_deps[i].jd_list.wk_mp = mp; + jseg->js_deps[i].jd_list.wk_type = D_JSEGDEP; + jseg->js_deps[i].jd_list.wk_state = 0; + jseg->js_deps[i].jd_seg = jseg; + } + size = fragroundup(fs, (cnt+1) * JREC_SIZE); + bp = geteblk(size, 0); + ACQUIRE_LOCK(&lk); + /* + * If there was a race while we were allocating the block + * and jseg the entry we care about was likely written. + * We bail out in both the WAIT and NOWAIT case and assume + * the caller will loop if the entry it cares about is + * not written. + */ + if (ump->softdep_on_journal == 0 || jblocks->jb_free == 0) { + bp->b_flags |= B_INVAL | B_NOCACHE; + WORKITEM_FREE(jseg, D_JSEG); + FREE_LOCK(&lk); + brelse(bp); + ACQUIRE_LOCK(&lk); + return; + } + /* + * XXX If we didn't preallocate jsegdeps we could go up to + * the size available in the block. + */ + cnt = MIN(ump->softdep_on_journal, cnt); + size = fragroundup(fs, (cnt + 1) * JREC_SIZE); + /* + * Allocate a disk block for this journal data and account + * for truncation of the requested size if enough contiguous + * space was not available. + */ + bp->b_blkno = bp->b_lblkno = jblocks_alloc(jblocks, size, + &size); + bp->b_offset = bp->b_blkno * DEV_BSIZE; + bp->b_bcount = size; + bp->b_bufobj = &ump->um_devvp->v_bufobj; + bp->b_flags &= ~B_INVAL; + /* + * Initialize our jseg with as many as cnt - 1 records. + * Assign the next sequence number to it and link it + * in-order. + */ + cnt = MIN(ump->softdep_on_journal, (size / JREC_SIZE) - 1); + jseg->js_buf = bp; + jseg->js_cnt = cnt; + jseg->js_size = size; + jseg->js_seq = jblocks->jb_nextseq++; + if (TAILQ_EMPTY(&jblocks->jb_segs)) + jblocks->jb_oldestseq = jseg->js_seq; + TAILQ_INSERT_TAIL(&jblocks->jb_segs, jseg, js_next); + /* + * Start filling in records from the pending list. + */ + data = bp->b_data; + jseg_write(jblocks, jseg, data); + data += JREC_SIZE; + while ((wk = LIST_FIRST(&ump->softdep_journal_pending)) + != NULL) { + remove_from_journal(wk); + wk->wk_state |= IOSTARTED; + WORKLIST_INSERT(&jseg->js_entries, wk); + switch (wk->wk_type) { + case D_JADDREF: + jaddref_write(WK_JADDREF(wk), data); + break; + case D_JREMREF: + jremref_write(WK_JREMREF(wk), data); + break; + case D_JNEWBLK: + jnewblk_write(WK_JNEWBLK(wk), data); + break; + case D_JFREEBLK: + jfreeblk_write(WK_JFREEBLK(wk), data); + break; + case D_JFREEFRAG: + jfreefrag_write(WK_JFREEFRAG(wk), data); + break; + default: + panic("process_journal: Unknown type %s", + TYPENAME(wk->wk_type)); + /* NOTREACHED */ + } + data += JREC_SIZE; + if (--cnt == 0) + break; + } + /* + * Write this one buffer and continue. + */ +#if 1 + WORKLIST_INSERT(&bp->b_dep, &jseg->js_list); + FREE_LOCK(&lk); + BO_LOCK(bp->b_bufobj); + bgetvp(ump->um_devvp, bp); + BO_UNLOCK(bp->b_bufobj); + /* + * XXX bawrite? Completion ordering? We may permit writing + * the journal out of order but then we must complete + * those segments in order by delaying handle_written_jseg + * until all predecessors have completed. + */ + bwrite(bp); + ACQUIRE_LOCK(&lk); +#else + handle_written_jseg(jseg, bp); + FREE_LOCK(&lk); + brelse(bp); + ACQUIRE_LOCK(&lk); +#endif + segwritten++; + } +} + +/* + * Complete a jseg write, allowing all dependencies awaiting journal writes + * to proceed. Each journal dependency also attaches a jsegdep to dependent + * structures so that the journal segment can be freed to reclaim space. + */ +static void +handle_written_jseg(jseg, bp) + struct jseg *jseg; + struct buf *bp; +{ + struct worklist *wk; + struct jsegdep *jsegdep; + int waiting; + int i; + + if (jseg->js_refs == 0) + panic("handle_written_jseg: No self-reference on %p", jseg); + /* + * We'll never need this buffer again, set flags so it will be + * discarded. + */ + bp->b_flags |= B_INVAL | B_NOCACHE; + i = 0; + while ((wk = LIST_FIRST(&jseg->js_entries)) != NULL) { + WORKLIST_REMOVE(wk); + waiting = wk->wk_state & IOWAITING; + wk->wk_state &= ~(IOSTARTED | IOWAITING); + wk->wk_state |= COMPLETE; + KASSERT(i < jseg->js_cnt, + ("handle_written_jseg: overflow %d >= %d", + i, jseg->js_cnt)); + jsegdep = &jseg->js_deps[i++]; +#ifdef DEBUG + if (jsegdep->jd_state != 0) + panic("Invalid jsegdep: %p jseg %p", jsegdep, jseg); + jsegdep->jd_state = ATTACHED; + jsegdep->jd_type = wk->wk_type; + jsegdep->jd_line = __LINE__; +#endif + jseg->js_refs++; + switch (wk->wk_type) { + case D_JADDREF: + handle_written_jaddref(WK_JADDREF(wk), jsegdep); + break; + case D_JREMREF: + handle_written_jremref(WK_JREMREF(wk), jsegdep); + break; + case D_JNEWBLK: + handle_written_jnewblk(WK_JNEWBLK(wk), jsegdep); + break; + case D_JFREEBLK: + handle_written_jfreeblk(WK_JFREEBLK(wk), jsegdep); + break; + case D_JFREEFRAG: + handle_written_jfreefrag(WK_JFREEFRAG(wk), jsegdep); + break; + default: + panic("handle_written_jseg: Unknown type %s", + TYPENAME(wk->wk_type)); + /* NOTREACHED */ + } + if (waiting) + wakeup(wk); + } + /* Reset the cnt for the real number of jsegdeps handed out. */ + jseg->js_cnt = i; + /* Release the self reference so the structure may be freed. */ + free_jseg(jseg); +} + +/* + * Called once a jremref has made it to stable store. The jremref is marked + * complete and we attempt to free it. Any pagedeps writes sleeping waiting + * for the jremref to complete will be awoken by free_jremref. + */ +static void +handle_written_jremref(jremref, jsegdep) + struct jremref *jremref; + struct jsegdep *jsegdep; +{ + struct dirrem *dirrem; + + dirrem = jremref->jr_dirrem; + jremref->jr_dirrem = NULL; + LIST_REMOVE(jremref, jr_deps); + jsegdep->jd_state |= jremref->jr_state & MKDIR_PARENT; + WORKLIST_INSERT(&dirrem->dm_jwork, &jsegdep->jd_list); + if (LIST_EMPTY(&dirrem->dm_jremrefhd) && + (dirrem->dm_state & COMPLETE) != 0) + add_to_worklist(&dirrem->dm_list); + free_jremref(jremref); +} + +/* + * Called once a jaddref has made it to stable store. The dependency is + * marked complete and any dependent structures are added to the inode + * bufwait list to be completed as soon as it is written. If a bitmap + * write depends on this entry we move the inode into the inodedephd + * of the bmsafemap dependency and attempt to remove the jaddref from + * the bmsafemap. + */ +static void +handle_written_jaddref(jaddref, jsegdep) + struct jaddref *jaddref; + struct jsegdep *jsegdep; +{ + struct inodedep *inodedep; + struct diradd *diradd; + struct mkdir *mkdir; + + mkdir = NULL; + diradd = NULL; + if (inodedep_lookup(jaddref->ja_list.wk_mp, jaddref->ja_ino, + 0, &inodedep) == 0) + panic("handle_written_jaddref: Lost inodedep."); + if (jaddref->ja_diradd == NULL) + panic("handle_written_jaddref: No dependency"); + if (jaddref->ja_diradd->da_list.wk_type == D_DIRADD) { + diradd = jaddref->ja_diradd; + WORKLIST_INSERT(&inodedep->id_bufwait, &diradd->da_list); + } else if (jaddref->ja_state & MKDIR_PARENT) { + mkdir = jaddref->ja_mkdir; + WORKLIST_INSERT(&inodedep->id_bufwait, &mkdir->md_list); + } else if (jaddref->ja_state & MKDIR_BODY) + mkdir = jaddref->ja_mkdir; + else + panic("handle_written_jaddref: Unknown dependency %p", + jaddref->ja_diradd); + jaddref->ja_diradd = NULL; /* also clears ja_mkdir */ + /* + * The mkdir may be waiting on the jaddref to clear before freeing. + */ + if (mkdir) { + KASSERT(mkdir->md_list.wk_type == D_MKDIR, + ("handle_written_jaddref: Incorrect type for mkdir %s", + TYPENAME(mkdir->md_list.wk_type))); + mkdir->md_jaddref = NULL; + diradd = mkdir->md_diradd; + mkdir->md_state |= DEPCOMPLETE; + complete_mkdir(mkdir); + } + WORKLIST_INSERT(&diradd->da_jwork, &jsegdep->jd_list); + if (jaddref->ja_state & NEWBLOCK) { + LIST_REMOVE(jaddref, ja_inodeps); + jaddref->ja_state &= ~ONDEPLIST; + inodedep->id_state |= ONDEPLIST; + LIST_INSERT_HEAD(&inodedep->id_bmsafemap->sm_inodedephd, + inodedep, id_deps); + } + free_jaddref(jaddref); +} + +/* + * Called once a jnewblk journal is written. The allocdirect or allocindir + * is placed in the bmsafemap to await notification of a written bitmap. + */ +static void +handle_written_jnewblk(jnewblk, jsegdep) + struct jnewblk *jnewblk; + struct jsegdep *jsegdep; +{ + struct bmsafemap *bmsafemap; + struct newblk *newblk; + + newblk = jnewblk->jn_newblk; + jnewblk->jn_newblk = NULL; + if (newblk == NULL) + panic("handle_written_jnewblk: No dependency for the segdep."); + + newblk->nb_jnewblk = NULL; + bmsafemap = newblk->nb_bmsafemap; + WORKLIST_INSERT(&newblk->nb_jwork, &jsegdep->jd_list); + newblk->nb_state |= ONDEPLIST; + LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk, nb_deps); + free_jnewblk(jnewblk); +} + +static void +cancel_jfreefrag(jfreefrag) + struct jfreefrag *jfreefrag; +{ + struct freefrag *freefrag; + + freefrag = jfreefrag->fr_freefrag; + jfreefrag->fr_freefrag = NULL; + freefrag->ff_jfreefrag = NULL; + free_jfreefrag(jfreefrag); + freefrag->ff_state |= DEPCOMPLETE; +} + +/* + * Free a jfreefrag when the parent freefrag is rendered obsolete. + */ +static void +free_jfreefrag(jfreefrag) + struct jfreefrag *jfreefrag; +{ + + if (jfreefrag->fr_state & IOSTARTED) + WORKLIST_REMOVE(&jfreefrag->fr_list); + else if (jfreefrag->fr_state & ONWORKLIST) + remove_from_journal(&jfreefrag->fr_list); + if (jfreefrag->fr_freefrag != NULL) + panic("free_jfreefrag: Still attached to a freefrag."); + WORKITEM_FREE(jfreefrag, D_JFREEFRAG); +} + +/* + * Called when the journal write for a jfreefrag completes. The parent + * freefrag is added to the worklist if this completes its dependencies. + */ +static void +handle_written_jfreefrag(jfreefrag, jsegdep) + struct jfreefrag *jfreefrag; + struct jsegdep *jsegdep; +{ + struct freefrag *freefrag; + + freefrag = jfreefrag->fr_freefrag; + if (freefrag == NULL) + panic("handle_written_jfreefrag: No freefrag."); + freefrag->ff_state |= DEPCOMPLETE; + freefrag->ff_jfreefrag = NULL; + WORKLIST_INSERT(&freefrag->ff_jwork, &jsegdep->jd_list); + if ((freefrag->ff_state & ALLCOMPLETE) == ALLCOMPLETE) + add_to_worklist(&freefrag->ff_list); + jfreefrag->fr_freefrag = NULL; + free_jfreefrag(jfreefrag); +} + +/* + * Called when the journal write for a jfreeblk completes. The jfreeblk + * is removed from the freeblks list of pending journal writes and the + * jsegdep is moved to the freeblks jwork to be completed when all blocks + * have been reclaimed. + */ +static void +handle_written_jfreeblk(jfreeblk, jsegdep) + struct jfreeblk *jfreeblk; + struct jsegdep *jsegdep; +{ + struct freeblks *freeblks; + + freeblks = jfreeblk->jf_freeblks; + LIST_REMOVE(jfreeblk, jf_deps); + WORKLIST_INSERT(&freeblks->fb_jwork, &jsegdep->jd_list); + + /* + * If the freeblks is all journaled, we can add it to the worklist. + */ + if (LIST_EMPTY(&freeblks->fb_jfreeblkhd) && + (freeblks->fb_state & ALLCOMPLETE) == ALLCOMPLETE) { + /* Remove from the b_dep that is waiting on this write. */ + if (freeblks->fb_state & ONWORKLIST) + WORKLIST_REMOVE(&freeblks->fb_list); + add_to_worklist(&freeblks->fb_list); + } + + free_jfreeblk(jfreeblk); +} + +/* + * Allocate a new jremref that tracks the removal of ip from dp with the + * directory entry offset of diroff. Mark the entry as ATTACHED and + * DEPCOMPLETE as we have all the information required for the journal write + * and the directory has already been removed from the buffer. The caller + * is responsible for linking the jremref into the pagedep and adding it + * to the journal to write. The MKDIR_PARENT flag is set if we're doing + * a DOTDOT addition so handle_workitem_remove() can properly assign + * the jsegdep when we're done. + */ +static struct jremref * +newjremref(dirrem, dp, ip, diroff) + struct dirrem *dirrem; + struct inode *dp; + struct inode *ip; + off_t diroff; +{ + struct jremref *jremref; + + jremref = malloc(sizeof(*jremref), M_JREMREF, M_SOFTDEP_FLAGS); + workitem_alloc(&jremref->jr_list, D_JREMREF, UFSTOVFS(dp->i_ump)); + jremref->jr_state = ATTACHED | DEPCOMPLETE; + jremref->jr_dirrem = dirrem; + jremref->jr_diroff = diroff; + jremref->jr_ino = ip->i_number; + jremref->jr_parent = dp->i_number; + jremref->jr_mode = ip->i_mode; + jremref->jr_nlink = ip->i_nlink; + + return (jremref); +} + +/* + * Allocate a new jaddref to track the addition of ino to dp at diroff. The + * directory offset may not be known until later. The caller is responsible + * adding the entry to the journal when this information is available. nlink + * should be the link count prior to the addition and mode is only required + * to have the correct FMT. + */ +static struct jaddref * +newjaddref(dp, ino, diroff, nlink, mode) + struct inode *dp; + ino_t ino; + off_t diroff; + int16_t nlink; + uint16_t mode; +{ + struct jaddref *jaddref; + + jaddref = malloc(sizeof(*jaddref), M_JADDREF, M_SOFTDEP_FLAGS); + workitem_alloc(&jaddref->ja_list, D_JADDREF, UFSTOVFS(dp->i_ump)); + jaddref->ja_mkdir = NULL; + jaddref->ja_state = ATTACHED | DEPCOMPLETE | ONDEPLIST; + jaddref->ja_diroff = diroff; + jaddref->ja_ino = ino; + jaddref->ja_parent = dp->i_number; + jaddref->ja_mode = mode; + jaddref->ja_nlink = nlink; + + return (jaddref); +} + +/* + * Create a new free dependency for a freework. The caller is responsible + * for adjusting the reference count when it has the lock held. The freedep + * will track an outstanding bitmap write that will ultimately clear the + * freework to continue. + */ +static struct freedep * +newfreedep(struct freework *freework) +{ + struct freedep *freedep; + + freedep = malloc(sizeof(*freedep), M_FREEDEP, M_SOFTDEP_FLAGS); + workitem_alloc(&freedep->fd_list, D_FREEDEP, freework->fw_list.wk_mp); + freedep->fd_freework = freework; + + return (freedep); +} + +/* + * Free a freedep structure once the buffer it is linked to is written. If + * this is the last reference to the freework schedule it for completion. + */ +static void +free_freedep(freedep) + struct freedep *freedep; +{ + + if (--freedep->fd_freework->fw_ref == 0) + add_to_worklist(&freedep->fd_freework->fw_list); + WORKITEM_FREE(freedep, D_FREEDEP); +} + +/* + * Allocate a new freework structure that may be a level in an indirect + * when parent is not NULL or a top level block when it is. The top level + * freework structures are allocated without lk held and before the freeblks + * is visible outside of softdep_setup_freeblocks(). + */ +static struct freework * +newfreework(freeblks, parent, lbn, nb, frags, journal) + struct freeblks *freeblks; + struct freework *parent; + ufs_lbn_t lbn; + ufs2_daddr_t nb; + int frags; + int journal; +{ + struct freework *freework; + + freework = malloc(sizeof(*freework), M_FREEWORK, M_SOFTDEP_FLAGS); + workitem_alloc(&freework->fw_list, D_FREEWORK, freeblks->fb_list.wk_mp); + freework->fw_freeblks = freeblks; + freework->fw_parent = parent; + freework->fw_lbn = lbn; + freework->fw_blkno = nb; + freework->fw_frags = frags; + freework->fw_ref = 0; + freework->fw_off = 0; + LIST_INIT(&freework->fw_jwork); + + if (parent == NULL) { + WORKLIST_INSERT_UNLOCKED(&freeblks->fb_freeworkhd, + &freework->fw_list); + freeblks->fb_ref++; + } + if (journal) + newjfreeblk(freeblks, lbn, nb, frags); + + return (freework); +} + +/* + * Allocate a new jfreeblk to journal top level block pointer when truncating + * a file. The caller must add this to the worklist when lk is held. + */ +static struct jfreeblk * +newjfreeblk(freeblks, lbn, blkno, frags) + struct freeblks *freeblks; + ufs_lbn_t lbn; + ufs2_daddr_t blkno; + int frags; +{ + struct jfreeblk *jfreeblk; + + jfreeblk = malloc(sizeof(*jfreeblk), M_JFREEBLK, M_SOFTDEP_FLAGS); + workitem_alloc(&jfreeblk->jf_list, D_JFREEBLK, freeblks->fb_list.wk_mp); + jfreeblk->jf_state = ATTACHED | DEPCOMPLETE; + jfreeblk->jf_ino = freeblks->fb_previousinum; + jfreeblk->jf_lbn = lbn; + jfreeblk->jf_blkno = blkno; + jfreeblk->jf_frags = frags; + jfreeblk->jf_freeblks = freeblks; + LIST_INSERT_HEAD(&freeblks->fb_jfreeblkhd, jfreeblk, jf_deps); + + return (jfreeblk); +} + +/* + * Cancel a jaddref either before it has been written or while it is being + * written. This happens when a link is removed before the add reaches + * the disk. The jaddref dependency is kept linked into the bmsafemap + * and inode to prevent the link count or bitmap from reaching the disk + * until handle_workitem_remove() re-adjusts the counts and bitmaps as + * required. + */ +static void +cancel_jaddref(jaddref, inodedep, wkhd) + struct jaddref *jaddref; + struct inodedep *inodedep; + struct workhead *wkhd; +{ + struct jaddref *jaddrefn; + + /* + * If we're canceling a new bitmap we have to search for another ref + * to move into the bmsafemap dep. This might be better expressed + * with another structure. + */ + if (jaddref->ja_state & NEWBLOCK && inodedep != NULL) { + LIST_FOREACH(jaddrefn, &inodedep->id_jaddrefhd, ja_inodeps) { + if (jaddrefn == jaddref) + continue; + if ((jaddrefn->ja_state & GOINGAWAY) != 0) + continue; + jaddrefn->ja_state &= ~(ATTACHED | UNDONE); + jaddrefn->ja_state |= + jaddref->ja_state & (ATTACHED | UNDONE | NEWBLOCK); + jaddref->ja_state &= ~(ATTACHED | UNDONE | NEWBLOCK); + jaddref->ja_state |= ATTACHED; + LIST_REMOVE(jaddref, ja_bmdeps); + LIST_INSERT_HEAD(&inodedep->id_bmsafemap->sm_jaddrefhd, + jaddrefn, ja_bmdeps); + } + } + if (jaddref->ja_state & IOWAITING) { + jaddref->ja_state &= ~IOWAITING; + wakeup(&jaddref->ja_list); + } + jaddref->ja_state |= GOINGAWAY; + jaddref->ja_mkdir = NULL; + if (jaddref->ja_state & IOSTARTED) { + jaddref->ja_state &= ~IOSTARTED; + WORKLIST_REMOVE(&jaddref->ja_list); + } else + remove_from_journal(&jaddref->ja_list); + if (wkhd == NULL) + return; + /* + * Leave the head of the list for jsegdeps for fast merging. + */ + if (LIST_FIRST(wkhd) != NULL) { + jaddref->ja_state |= ONWORKLIST; + LIST_INSERT_AFTER(LIST_FIRST(wkhd), &jaddref->ja_list, wk_list); + } else + WORKLIST_INSERT(wkhd, &jaddref->ja_list); +} + +/* + * Attempt to free a jaddref structure when some work completes. This + * should only succeed once the entry is written and all dependencies have + * been notified. + */ +static void +free_jaddref(jaddref) + struct jaddref *jaddref; +{ + + if ((jaddref->ja_state & ALLCOMPLETE) != ALLCOMPLETE) + return; + if (jaddref->ja_state & NEWBLOCK) + LIST_REMOVE(jaddref, ja_bmdeps); + if (jaddref->ja_state & ONDEPLIST) + LIST_REMOVE(jaddref, ja_inodeps); + if (jaddref->ja_state & (IOSTARTED | ONWORKLIST)) + panic("free_jaddref: Bad state %p(0x%X)", + jaddref, jaddref->ja_state); + if (jaddref->ja_mkdir != NULL) + panic("free_jaddref: Work pending, 0x%X\n", jaddref->ja_state); + WORKITEM_FREE(jaddref, D_JADDREF); +} + +/* + * Free a jremref structure once all dependencies are complete. + */ +static void +free_jremref(jremref) + struct jremref *jremref; +{ + + if ((jremref->jr_state & ALLCOMPLETE) != ALLCOMPLETE) + return; + /* If we were never written remove ourselves from the worklist. */ + if (jremref->jr_state & IOSTARTED) + panic("free_jremref: IO still pending"); + WORKITEM_FREE(jremref, D_JREMREF); +} + +/* + * Free a jnewblk structure. + */ +static void +free_jnewblk(jnewblk) + struct jnewblk *jnewblk; +{ + + if ((jnewblk->jn_state & ALLCOMPLETE) != ALLCOMPLETE) + return; + LIST_REMOVE(jnewblk, jn_deps); + if (jnewblk->jn_newblk != NULL) + panic("free_jnewblk: Dependency still attached."); + WORKITEM_FREE(jnewblk, D_JNEWBLK); +} + +/* + * Cancel a jnewblk which has been superseded by a freeblk. The jnewblk + * is kept linked into the bmsafemap until the free completes, thus + * preventing the modified state from ever reaching disk. The free + * routine must pass this structure via ffs_blkfree() to + * softdep_setup_freeblks() so there is no race in releasing the space. + */ +static void +cancel_jnewblk(jnewblk, wkhd) + struct jnewblk *jnewblk; + struct workhead *wkhd; +{ + + if (jnewblk->jn_state & IOWAITING) { + jnewblk->jn_state &= ~IOWAITING; + wakeup(&jnewblk->jn_list); + } + jnewblk->jn_newblk = NULL; + jnewblk->jn_state |= GOINGAWAY; + if (jnewblk->jn_state & IOSTARTED) { + jnewblk->jn_state &= ~IOSTARTED; + WORKLIST_REMOVE(&jnewblk->jn_list); + } else + remove_from_journal(&jnewblk->jn_list); + /* + * Leave the head of the list for jsegdeps for fast merging. + */ + if (LIST_FIRST(wkhd) != NULL) { + jnewblk->jn_state |= ONWORKLIST; + LIST_INSERT_AFTER(LIST_FIRST(wkhd), &jnewblk->jn_list, wk_list); + } else + WORKLIST_INSERT(wkhd, &jnewblk->jn_list); +} + +static void +free_jfreeblk(jfreeblk) + struct jfreeblk *jfreeblk; +{ + + WORKITEM_FREE(jfreeblk, D_JFREEBLK); +} + +/* + * Release one reference to a jseg and free it if the count reaches 0. This + * should eventually reclaim journal space as well. + */ +static void +free_jseg(jseg) + struct jseg *jseg; +{ + struct jblocks *jblocks; + struct jsegdep *jsegdep; + + + KASSERT(jseg->js_refs > 0, + ("free_jseg: Invalid refcnt %d", jseg->js_refs)); + if (--jseg->js_refs != 0) + return; +#ifdef DEBUG + { + int errors; + int i; + errors = 0; + for (i = 0; i < jseg->js_cnt; i++) { + jsegdep = &jseg->js_deps[i]; + if (jsegdep->jd_state != 0) { + printf("free_jseg: %p(0x%X) allocated " + "to %s:%d\n", jsegdep, jsegdep->jd_state, + TYPENAME(jsegdep->jd_type), + jsegdep->jd_line); + errors = 1; + } + } + if (errors) + panic("errors"); + } +#endif + jblocks = jseg->js_jblocks; + /* + * Free only those jsegs which have none allocated before them to + * preserve the journal space ordering. + */ + while ((jseg = TAILQ_FIRST(&jblocks->jb_segs)) != NULL) { + jblocks->jb_oldestseq = jseg->js_seq; + if (jseg->js_refs != 0) + break; + TAILQ_REMOVE(&jblocks->jb_segs, jseg, js_next); + jblocks_free(jblocks, jseg->js_size); + KASSERT(LIST_EMPTY(&jseg->js_entries), + ("free_jseg: Freed jseg has valid entries.")); + WORKITEM_FREE(jseg, D_JSEG); + } +} + +/* + * Release a jsegdep and decrement the jseg count. + */ +static void +free_jsegdep(jsegdep) + struct jsegdep *jsegdep; +{ + +#ifdef DEBUG + if (jsegdep->jd_state != ATTACHED) + panic("free_jsegdep: Illegal state 0x%X.", jsegdep->jd_state); + jsegdep->jd_state &= ~ATTACHED; +#endif + free_jseg(jsegdep->jd_seg); +} + +/* + * Wait for a journal item to make it to disk. Initiate journal processing + * if required. + */ +static void +jwait(wk) + struct worklist *wk; +{ + + /* + * If IO has not started we process the journal. We can't mark the + * worklist item as IOWAITING because we drop the lock while + * processing the journal and the worklist entry may be freed after + * this point. The caller may call back in and re-issue the request. + */ + if ((wk->wk_state & IOSTARTED) == 0) { + softdep_process_journal(wk->wk_mp, MNT_WAIT); + return; + } + wk->wk_state |= IOWAITING; + msleep(wk, &lk, PRIBIO, "jwait", 0); +} + +/* + * Lookup an inodedep based on an inode pointer and set the nlinkdelta as + * appropriate. This is a convenience function to reduce duplicate code + * for the setup and revert functions below. + */ +static struct inodedep * +inodedep_lookup_ip(ip) + struct inode *ip; +{ + struct inodedep *inodedep; + + KASSERT(ip->i_nlink >= ip->i_effnlink, + ("inodedep_lookup_ip: bad delta")); + (void) inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, + DEPALLOC, &inodedep); + inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink; + + return (inodedep); +} + +/* + * Called prior to creating a new inode and linking it to a directory. The + * jaddref structure must already be allocated by softdep_setup_inomapdep + * and it is discovered here so we can initialize the mode and update + * nlinkdelta. + */ +void +softdep_setup_create(dp, ip) + struct inode *dp; + struct inode *ip; +{ + struct inodedep *inodedep; + struct jaddref *jaddref; + + KASSERT(ip->i_nlink == 1, + ("softdep_setup_create: Invalid link count.")); + + ACQUIRE_LOCK(&lk); + inodedep = inodedep_lookup_ip(ip); + jaddref = LIST_FIRST(&inodedep->id_jaddrefhd); + KASSERT(jaddref != NULL && + jaddref->ja_parent == dp->i_number && jaddref->ja_nlink == 0, + ("softdep_setup_create: No addref structure present.")); + jaddref->ja_mode = ip->i_mode; + FREE_LOCK(&lk); +} + +/* + * Create a jaddref structure to track the addition of a DOTDOT link when + * we are reparenting an inode as part of a rename. This jaddref will be + * found by softdep_setup_directory_change. + */ +void +softdep_setup_dotdot_link(dp, ip) + struct inode *dp; + struct inode *ip; +{ + struct inodedep *inodedep; + struct jaddref *jaddref; + + /* + * We don't set MKDIR_PARENT as this is not tied to a mkdir and + * is used as a normal link would be. + */ + jaddref = newjaddref(ip, dp->i_number, DOTDOT_OFFSET, dp->i_nlink - 1, + dp->i_mode); + ACQUIRE_LOCK(&lk); + inodedep = inodedep_lookup_ip(dp); + LIST_INSERT_HEAD(&inodedep->id_jaddrefhd, jaddref, ja_inodeps); + FREE_LOCK(&lk); +} + +/* + * Create a jaddref structure to track a new link to an inode. The directory + * offset is not known until softdep_setup_directory_add or + * softdep_setup_directory_change. + */ +void +softdep_setup_link(dp, ip) + struct inode *dp; + struct inode *ip; +{ + struct inodedep *inodedep; + struct jaddref *jaddref; + + jaddref = newjaddref(dp, ip->i_number, 0, ip->i_nlink - 1, ip->i_mode); + ACQUIRE_LOCK(&lk); + inodedep = inodedep_lookup_ip(ip); + LIST_INSERT_HEAD(&inodedep->id_jaddrefhd, jaddref, ja_inodeps); + FREE_LOCK(&lk); +} + +/* + * Called to create the jaddref structures to track . and .. references as + * well as lookup and further initialize the incomplete jaddref created + * by softdep_setup_inomapdep when the inode was allocated.A + */ +void +softdep_setup_mkdir(dp, ip) + struct inode *dp; + struct inode *ip; +{ + struct inodedep *inodedep; + struct jaddref *dotdotaddref; + struct jaddref *dotaddref; + struct jaddref *jaddref; + + dotaddref = newjaddref(ip, ip->i_number, DOT_OFFSET, 1, ip->i_mode); + dotaddref->ja_state |= MKDIR_BODY; + dotdotaddref = newjaddref(ip, dp->i_number, DOTDOT_OFFSET, + dp->i_nlink - 1, dp->i_mode); + dotdotaddref->ja_state |= MKDIR_PARENT; + + ACQUIRE_LOCK(&lk); + inodedep = inodedep_lookup_ip(ip); + jaddref = LIST_FIRST(&inodedep->id_jaddrefhd); + KASSERT(jaddref != NULL, + ("softdep_setup_mkdir: No addref structure present.")); + KASSERT(jaddref->ja_parent == dp->i_number && jaddref->ja_nlink == 0, + ("softdep_setup_mkdir: bad parent/link %d/%d", + jaddref->ja_parent, jaddref->ja_nlink)); + jaddref->ja_mode = ip->i_mode; + LIST_INSERT_AFTER(jaddref, dotaddref, ja_inodeps); + inodedep = inodedep_lookup_ip(dp); + LIST_INSERT_HEAD(&inodedep->id_jaddrefhd, dotdotaddref, ja_inodeps); + FREE_LOCK(&lk); +} + +/* + * Called to track nlinkdelta of the inode and parent directories prior to + * unlinking a directory. + */ +void +softdep_setup_rmdir(dp, ip) + struct inode *dp; + struct inode *ip; +{ + + ACQUIRE_LOCK(&lk); + (void) inodedep_lookup_ip(ip); + (void) inodedep_lookup_ip(dp); + FREE_LOCK(&lk); +} + +/* + * Called to track nlinkdelta of the inode and parent directories prior to + * unlink. + */ +void +softdep_setup_unlink(dp, ip) + struct inode *dp; + struct inode *ip; +{ + + ACQUIRE_LOCK(&lk); + (void) inodedep_lookup_ip(ip); + (void) inodedep_lookup_ip(dp); + FREE_LOCK(&lk); +} + +/* + * Called to release the journal structures created by a failed non-directory + * creation. + */ +void +softdep_revert_create(dp, ip) + struct inode *dp; + struct inode *ip; +{ + struct inodedep *inodedep; + struct jaddref *jaddref; + + ACQUIRE_LOCK(&lk); + inodedep = inodedep_lookup_ip(ip); + jaddref = LIST_FIRST(&inodedep->id_jaddrefhd); + jaddref->ja_state |= COMPLETE | DEPCOMPLETE; + KASSERT(jaddref->ja_parent == dp->i_number, + ("softdep_revert_create: addref parent mismatch")); + free_jaddref(jaddref); + FREE_LOCK(&lk); +} + +/* + * Called to release the journal structures created by a failed dotdot link + * creation. + */ +void +softdep_revert_dotdot_link(dp, ip) + struct inode *dp; + struct inode *ip; +{ + struct inodedep *inodedep; + struct jaddref *jaddref; + + ACQUIRE_LOCK(&lk); + inodedep = inodedep_lookup_ip(dp); + jaddref = LIST_FIRST(&inodedep->id_jaddrefhd); + jaddref->ja_state |= COMPLETE | DEPCOMPLETE; + KASSERT(jaddref->ja_parent == ip->i_number, + ("softdep_revert_dotdot_link: addref parent mismatch")); + free_jaddref(jaddref); + FREE_LOCK(&lk); +} + +/* + * Called to release the journal structures created by a failed link + * addition. + */ +void +softdep_revert_link(dp, ip) + struct inode *dp; + struct inode *ip; +{ + struct inodedep *inodedep; + struct jaddref *jaddref; + + ACQUIRE_LOCK(&lk); + inodedep = inodedep_lookup_ip(ip); + jaddref = LIST_FIRST(&inodedep->id_jaddrefhd); + jaddref->ja_state |= COMPLETE | DEPCOMPLETE; + KASSERT(jaddref->ja_parent == dp->i_number, + ("softdep_revert_link: addref parent mismatch")); + free_jaddref(jaddref); + FREE_LOCK(&lk); +} + +/* + * Called to release the journal structures created by a failed mkdir + * attempt. + */ +void +softdep_revert_mkdir(dp, ip) + struct inode *dp; + struct inode *ip; +{ + struct inodedep *inodedep; + struct jaddref *jaddref; + + ACQUIRE_LOCK(&lk); + inodedep = inodedep_lookup_ip(dp); + jaddref = LIST_FIRST(&inodedep->id_jaddrefhd); + jaddref->ja_state |= COMPLETE | DEPCOMPLETE; + KASSERT(jaddref->ja_parent == ip->i_number, + ("softdep_revert_mkdir: dotdot addref parent mismatch")); + free_jaddref(jaddref); + inodedep = inodedep_lookup_ip(ip); + jaddref = LIST_FIRST(&inodedep->id_jaddrefhd); + jaddref->ja_state |= COMPLETE | DEPCOMPLETE; + KASSERT(jaddref->ja_parent == dp->i_number, + ("softdep_revert_mkdir: addref parent mismatch")); + free_jaddref(jaddref); + jaddref = LIST_FIRST(&inodedep->id_jaddrefhd); + jaddref->ja_state |= COMPLETE | DEPCOMPLETE; + KASSERT(jaddref->ja_parent == ip->i_number, + ("softdep_revert_mkdir: dot addref parent mismatch")); + free_jaddref(jaddref); + FREE_LOCK(&lk); +} + +/* + * Called to correct nlinkdelta after a failed rmdir. + */ +void +softdep_revert_rmdir(dp, ip) + struct inode *dp; + struct inode *ip; +{ + + ACQUIRE_LOCK(&lk); + (void) inodedep_lookup_ip(ip); + (void) inodedep_lookup_ip(dp); + FREE_LOCK(&lk); +} + +/* * Protecting the freemaps (or bitmaps). * * To eliminate the need to execute fsck before mounting a filesystem @@ -1536,8 +3273,16 @@ softdep_setup_inomapdep(bp, ip, newinum) { struct inodedep *inodedep; struct bmsafemap *bmsafemap; + struct jaddref *jaddref; /* + * Allocate the journal reference add structure so that the bitmap + * can be dependent on it. + */ + jaddref = newjaddref(ip, newinum, 0, 0, 0); + jaddref->ja_state |= NEWBLOCK; + + /* * Create a dependency for the newly allocated inode. * Panic if it already exists as something is seriously wrong. * Otherwise add it to the dependency list for the buffer holding @@ -1546,12 +3291,14 @@ softdep_setup_inomapdep(bp, ip, newinum) ACQUIRE_LOCK(&lk); if ((inodedep_lookup(UFSTOVFS(ip->i_ump), newinum, DEPALLOC|NODELAY, &inodedep))) - panic("softdep_setup_inomapdep: dependency for new inode " - "already exists"); - inodedep->id_buf = bp; + panic("softdep_setup_inomapdep: dependency %p for new" + "inode already exists", inodedep); + bmsafemap = bmsafemap_lookup(UFSTOVFS(ip->i_ump), bp, + ino_to_cg(ip->i_ump->um_fs, newinum)); + LIST_INSERT_HEAD(&bmsafemap->sm_jaddrefhd, jaddref, ja_bmdeps); + inodedep->id_bmsafemap = bmsafemap; inodedep->id_state &= ~DEPCOMPLETE; - bmsafemap = bmsafemap_lookup(inodedep->id_list.wk_mp, bp); - LIST_INSERT_HEAD(&bmsafemap->sm_inodedephd, inodedep, id_deps); + LIST_INSERT_HEAD(&inodedep->id_jaddrefhd, jaddref, ja_inodeps); FREE_LOCK(&lk); } @@ -1560,13 +3307,16 @@ softdep_setup_inomapdep(bp, ip, newinum) * allocate block or fragment. */ void -softdep_setup_blkmapdep(bp, mp, newblkno) +softdep_setup_blkmapdep(bp, mp, newblkno, frags, oldfrags) struct buf *bp; /* buffer for cylgroup block with block map */ struct mount *mp; /* filesystem doing allocation */ ufs2_daddr_t newblkno; /* number of newly allocated block */ + int frags; /* Number of fragments. */ + int oldfrags; /* Previous number of fragments for extend. */ { struct newblk *newblk; struct bmsafemap *bmsafemap; + struct jnewblk *jnewblk; struct fs *fs; fs = VFSTOUFS(mp)->um_fs; @@ -1575,14 +3325,70 @@ void * Add it to the dependency list for the buffer holding * the cylinder group map from which it was allocated. */ + jnewblk = malloc(sizeof(*jnewblk), M_JNEWBLK, M_SOFTDEP_FLAGS); + workitem_alloc(&jnewblk->jn_list, D_JNEWBLK, mp); + jnewblk->jn_state = ATTACHED; + jnewblk->jn_blkno = newblkno; + jnewblk->jn_frags = frags; + jnewblk->jn_oldfrags = oldfrags; +#ifdef DEBUG + { + struct cg *cgp; + uint8_t *blksfree; + long bno; + int i; + + cgp = (struct cg *)bp->b_data; + blksfree = cg_blksfree(cgp); + bno = dtogd(fs, jnewblk->jn_blkno); + for (i = jnewblk->jn_oldfrags; i < jnewblk->jn_frags; i++) { + if (isset(blksfree, bno + i)) + panic("softdep_setup_blkmapdep: " + "free fragment %d from %d-%d " + "state 0x%X dep %p", i, + jnewblk->jn_oldfrags, + jnewblk->jn_frags, + jnewblk->jn_state, + jnewblk->jn_newblk); + } + } +#endif ACQUIRE_LOCK(&lk); - if (newblk_lookup(fs, newblkno, DEPALLOC, &newblk) != 0) + if (newblk_lookup(mp, newblkno, DEPALLOC, &newblk) != 0) panic("softdep_setup_blkmapdep: found block"); - newblk->nb_bmsafemap = bmsafemap = bmsafemap_lookup(mp, bp); - LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk, nb_deps); + newblk->nb_bmsafemap = bmsafemap = bmsafemap_lookup(mp, bp, + dtog(fs, newblkno)); + jnewblk->jn_newblk = newblk; + newblk->nb_bmsafemap = bmsafemap; + newblk->nb_jnewblk = jnewblk; + LIST_INSERT_HEAD(&bmsafemap->sm_jnewblkhd, jnewblk, jn_deps); FREE_LOCK(&lk); } +#define BMSAFEMAP_HASH(fs, cg) \ + (&bmsafemap_hashtbl[((((register_t)(fs)) >> 13) + (cg)) & bmsafemap_hash]) + +static int +bmsafemap_find(bmsafemaphd, mp, cg, bmsafemapp) + struct bmsafemap_hashhead *bmsafemaphd; + struct mount *mp; + int cg; + struct bmsafemap **bmsafemapp; +{ + struct bmsafemap *bmsafemap; + + LIST_FOREACH(bmsafemap, bmsafemaphd, sm_hash) + if (bmsafemap->sm_list.wk_mp == mp && bmsafemap->sm_cg == cg) + break; + if (bmsafemap) { + *bmsafemapp = bmsafemap; + return (1); + } + *bmsafemapp = NULL; + + return (0); +} + /* * Find the bmsafemap associated with a cylinder group buffer. * If none exists, create one. The buffer must be locked when @@ -1590,27 +3396,43 @@ void * splbio interrupts blocked. */ static struct bmsafemap * -bmsafemap_lookup(mp, bp) +bmsafemap_lookup(mp, bp, cg) struct mount *mp; struct buf *bp; + int cg; { - struct bmsafemap *bmsafemap; + struct bmsafemap_hashhead *bmsafemaphd; + struct bmsafemap *bmsafemap, *collision; struct worklist *wk; + struct fs *fs; mtx_assert(&lk, MA_OWNED); - LIST_FOREACH(wk, &bp->b_dep, wk_list) - if (wk->wk_type == D_BMSAFEMAP) - return (WK_BMSAFEMAP(wk)); + if (bp) + LIST_FOREACH(wk, &bp->b_dep, wk_list) + if (wk->wk_type == D_BMSAFEMAP) + return (WK_BMSAFEMAP(wk)); + fs = VFSTOUFS(mp)->um_fs; + bmsafemaphd = BMSAFEMAP_HASH(fs, cg); + if (bmsafemap_find(bmsafemaphd, mp, cg, &bmsafemap) == 1) + return (bmsafemap); FREE_LOCK(&lk); bmsafemap = malloc(sizeof(struct bmsafemap), M_BMSAFEMAP, M_SOFTDEP_FLAGS); workitem_alloc(&bmsafemap->sm_list, D_BMSAFEMAP, mp); bmsafemap->sm_buf = bp; - LIST_INIT(&bmsafemap->sm_allocdirecthd); - LIST_INIT(&bmsafemap->sm_allocindirhd); LIST_INIT(&bmsafemap->sm_inodedephd); + LIST_INIT(&bmsafemap->sm_inodedepwr); LIST_INIT(&bmsafemap->sm_newblkhd); + LIST_INIT(&bmsafemap->sm_newblkwr); + LIST_INIT(&bmsafemap->sm_jaddrefhd); + LIST_INIT(&bmsafemap->sm_jnewblkhd); ACQUIRE_LOCK(&lk); + if (bmsafemap_find(bmsafemaphd, mp, cg, &collision) == 1) { + WORKITEM_FREE(bmsafemap, D_BMSAFEMAP); + return (collision); + } + bmsafemap->sm_cg = cg; + LIST_INSERT_HEAD(bmsafemaphd, bmsafemap, sm_hash); WORKLIST_INSERT(&bp->b_dep, &bmsafemap->sm_list); return (bmsafemap); } @@ -1645,8 +3467,10 @@ static struct bmsafemap * * unreferenced fragments. */ void -softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp) +softdep_setup_allocdirect(ip, off, lbn, newblkno, oldblkno, newsize, oldsize, + bp) struct inode *ip; /* inode to which block is being added */ + int off; /* Offset from start of di_db. */ ufs_lbn_t lbn; /* block pointer within inode */ ufs2_daddr_t newblkno; /* disk block number being added */ ufs2_daddr_t oldblkno; /* previous block number, 0 unless frag */ @@ -1656,30 +3480,21 @@ void { struct allocdirect *adp, *oldadp; struct allocdirectlst *adphead; - struct bmsafemap *bmsafemap; + struct freefrag *freefrag; struct inodedep *inodedep; struct pagedep *pagedep; + struct jnewblk *jnewblk; struct newblk *newblk; struct mount *mp; mp = UFSTOVFS(ip->i_ump); - adp = malloc(sizeof(struct allocdirect), - M_ALLOCDIRECT, M_SOFTDEP_FLAGS|M_ZERO); - workitem_alloc(&adp->ad_list, D_ALLOCDIRECT, mp); - adp->ad_lbn = lbn; - adp->ad_newblkno = newblkno; - adp->ad_oldblkno = oldblkno; - adp->ad_newsize = newsize; - adp->ad_oldsize = oldsize; - adp->ad_state = ATTACHED; - LIST_INIT(&adp->ad_newdirblk); - if (newblkno == oldblkno) - adp->ad_freefrag = NULL; + if (oldblkno && oldblkno != newblkno) + freefrag = newfreefrag(ip, oldblkno, oldsize, lbn); else - adp->ad_freefrag = newfreefrag(ip, oldblkno, oldsize); + freefrag = NULL; ACQUIRE_LOCK(&lk); - if (lbn >= NDADDR) { + if (off >= NDADDR) { /* allocating an indirect block */ if (oldblkno != 0) panic("softdep_setup_allocdirect: non-zero indir"); @@ -1692,26 +3507,38 @@ void * deletions. */ if ((ip->i_mode & IFMT) == IFDIR && - pagedep_lookup(ip, lbn, DEPALLOC, &pagedep) == 0) + pagedep_lookup(mp, ip->i_number, off, DEPALLOC, + &pagedep) == 0) WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list); } - if (newblk_lookup(ip->i_fs, newblkno, 0, &newblk) == 0) + if (newblk_lookup(mp, newblkno, 0, &newblk) == 0) panic("softdep_setup_allocdirect: lost block"); - if (newblk->nb_state == DEPCOMPLETE) { - adp->ad_state |= DEPCOMPLETE; - adp->ad_buf = NULL; - } else { - bmsafemap = newblk->nb_bmsafemap; - adp->ad_buf = bmsafemap->sm_buf; - LIST_REMOVE(newblk, nb_deps); - LIST_INSERT_HEAD(&bmsafemap->sm_allocdirecthd, adp, ad_deps); + /* + * Convert the newblk to an allocdirect. + */ + newblk->nb_list.wk_type = D_ALLOCDIRECT; + newblk->nb_freefrag = freefrag; + adp = (struct allocdirect *)newblk; + adp->ad_offset = off; + adp->ad_oldblkno = oldblkno; + adp->ad_newsize = newsize; + adp->ad_oldsize = oldsize; + + /* + * Finish initializing the journal. + */ + if ((jnewblk = newblk->nb_jnewblk) != NULL) { + jnewblk->jn_ino = ip->i_number; + jnewblk->jn_lbn = lbn; + jnewblk->jn_state |= DEPCOMPLETE; + add_to_journal(&jnewblk->jn_list); } - LIST_REMOVE(newblk, nb_hash); - free(newblk, M_NEWBLK); - + if (freefrag && freefrag->ff_jfreefrag != NULL) + add_to_journal(&freefrag->ff_jfreefrag->fr_list); inodedep_lookup(mp, ip->i_number, DEPALLOC | NODELAY, &inodedep); adp->ad_inodedep = inodedep; - WORKLIST_INSERT(&bp->b_dep, &adp->ad_list); + + WORKLIST_INSERT(&bp->b_dep, &newblk->nb_list); /* * The list of allocdirects must be kept in sorted and ascending * order so that the rollback routines can quickly determine the @@ -1726,24 +3553,25 @@ void */ adphead = &inodedep->id_newinoupdt; oldadp = TAILQ_LAST(adphead, allocdirectlst); - if (oldadp == NULL || oldadp->ad_lbn <= lbn) { + if (oldadp == NULL || oldadp->ad_offset <= off) { /* insert at end of list */ TAILQ_INSERT_TAIL(adphead, adp, ad_next); - if (oldadp != NULL && oldadp->ad_lbn == lbn) + if (oldadp != NULL && oldadp->ad_offset == off) allocdirect_merge(adphead, adp, oldadp); FREE_LOCK(&lk); return; } TAILQ_FOREACH(oldadp, adphead, ad_next) { - if (oldadp->ad_lbn >= lbn) + if (oldadp->ad_offset >= off) break; } if (oldadp == NULL) panic("softdep_setup_allocdirect: lost entry"); /* insert in middle of list */ TAILQ_INSERT_BEFORE(oldadp, adp, ad_next); - if (oldadp->ad_lbn == lbn) + if (oldadp->ad_offset == off) allocdirect_merge(adphead, adp, oldadp); + FREE_LOCK(&lk); } @@ -1761,10 +3589,11 @@ allocdirect_merge(adphead, newadp, oldadp) struct freefrag *freefrag; struct newdirblk *newdirblk; + freefrag = NULL; mtx_assert(&lk, MA_OWNED); if (newadp->ad_oldblkno != oldadp->ad_newblkno || newadp->ad_oldsize != oldadp->ad_newsize || - newadp->ad_lbn >= NDADDR) + newadp->ad_offset >= NDADDR) panic("%s %jd != new %jd || old size %ld != new %ld", "allocdirect_merge: old blkno", (intmax_t)newadp->ad_oldblkno, @@ -1779,7 +3608,7 @@ allocdirect_merge(adphead, newadp, oldadp) * This action is done by swapping the freefrag dependencies. * The new dependency gains the old one's freefrag, and the * old one gets the new one and then immediately puts it on - * the worklist when it is freed by free_allocdirect. It is + * the worklist when it is freed by free_newblk. It is * not possible to do this swap when the old dependency had a * non-zero size but no previous fragment to free. This condition * arises when the new block is an extension of the old block. @@ -1788,8 +3617,8 @@ allocdirect_merge(adphead, newadp, oldadp) * the old dependency, so cannot legitimately be freed until the * conditions for the new dependency are fulfilled. */ + freefrag = newadp->ad_freefrag; if (oldadp->ad_freefrag != NULL || oldadp->ad_oldblkno == 0) { - freefrag = newadp->ad_freefrag; newadp->ad_freefrag = oldadp->ad_freefrag; oldadp->ad_freefrag = freefrag; } @@ -1804,32 +3633,94 @@ allocdirect_merge(adphead, newadp, oldadp) panic("allocdirect_merge: extra newdirblk"); WORKLIST_INSERT(&newadp->ad_newdirblk, &newdirblk->db_list); } - free_allocdirect(adphead, oldadp, 0); + TAILQ_REMOVE(adphead, oldadp, ad_next); + /* + * We need to move any journal dependencies over to the freefrag + * that releases this block if it exists. Otherwise we are + * extending an existing block and we'll wait until that is + * complete to release the journal space and extend the + * new journal to cover this old space as well. + */ + if (freefrag == NULL) { + struct jnewblk *jnewblk; + struct jnewblk *njnewblk; + + if (oldadp->ad_newblkno != newadp->ad_newblkno) + panic("allocdirect_merge: %jd != %jd", + oldadp->ad_newblkno, newadp->ad_newblkno); + jnewblk = oldadp->ad_block.nb_jnewblk; + cancel_newblk(&oldadp->ad_block, &newadp->ad_block.nb_jwork, + D_ALLOCDIRECT); + /* + * We have an unwritten jnewblk, we need to merge the + * frag bits with our own. The newer adp's journal can not + * be written prior to the old one so no need to check for + * it here. + */ + if (jnewblk) { + njnewblk = newadp->ad_block.nb_jnewblk; + if (jnewblk->jn_state & UNDONE) { + njnewblk->jn_state |= UNDONE | NEWBLOCK; + njnewblk->jn_state &= ~ATTACHED; + jnewblk->jn_state &= ~UNDONE; + } + njnewblk->jn_oldfrags = jnewblk->jn_oldfrags; + WORKLIST_REMOVE(&jnewblk->jn_list); + jnewblk->jn_state |= ATTACHED | COMPLETE; + free_jnewblk(jnewblk); + } + } else { + /* + * We can skip journaling for this freefrag and just complete + * any pending journal work for the allocdirect that is being + * removed after the freefrag completes. + */ + if (freefrag->ff_jfreefrag) + cancel_jfreefrag(freefrag->ff_jfreefrag); + cancel_newblk(&oldadp->ad_block, &freefrag->ff_jwork, + D_FREEFRAG); + } + free_newblk(&oldadp->ad_block); } /* * Allocate a new freefrag structure if needed. */ static struct freefrag * -newfreefrag(ip, blkno, size) +newfreefrag(ip, blkno, size, lbn) struct inode *ip; ufs2_daddr_t blkno; long size; + ufs_lbn_t lbn; { + struct jfreefrag *jfreefrag; struct freefrag *freefrag; struct fs *fs; - if (blkno == 0) - return (NULL); fs = ip->i_fs; if (fragnum(fs, blkno) + numfrags(fs, size) > fs->fs_frag) panic("newfreefrag: frag size"); freefrag = malloc(sizeof(struct freefrag), - M_FREEFRAG, M_SOFTDEP_FLAGS); + M_FREEFRAG, M_SOFTDEP_FLAGS); workitem_alloc(&freefrag->ff_list, D_FREEFRAG, UFSTOVFS(ip->i_ump)); + freefrag->ff_state = ATTACHED; + LIST_INIT(&freefrag->ff_jwork); freefrag->ff_inum = ip->i_number; freefrag->ff_blkno = blkno; freefrag->ff_fragsize = size; + + jfreefrag = malloc(sizeof(struct jfreefrag), M_JFREEFRAG, + M_SOFTDEP_FLAGS); + workitem_alloc(&jfreefrag->fr_list, D_JFREEFRAG, UFSTOVFS(ip->i_ump)); + jfreefrag->fr_state = ATTACHED | DEPCOMPLETE; + jfreefrag->fr_ino = ip->i_number; + jfreefrag->fr_lbn = lbn; + jfreefrag->fr_blkno = blkno; + jfreefrag->fr_frags = numfrags(fs, size); + + freefrag->ff_jfreefrag = jfreefrag; + jfreefrag->fr_freefrag = freefrag; + return (freefrag); } @@ -1842,9 +3733,17 @@ handle_workitem_freefrag(freefrag) struct freefrag *freefrag; { struct ufsmount *ump = VFSTOUFS(freefrag->ff_list.wk_mp); + struct workhead wkhd; + /* + * It would be illegal to add new completion items to the + * freefrag after it was schedule to be done so it must be + * safe to modify the list head here. + */ + LIST_INIT(&wkhd); + LIST_SWAP(&freefrag->ff_jwork, &wkhd, worklist, wk_list); ffs_blkfree(ump, ump->um_fs, ump->um_devvp, freefrag->ff_blkno, - freefrag->ff_fragsize, freefrag->ff_inum); + freefrag->ff_fragsize, freefrag->ff_inum, &wkhd); ACQUIRE_LOCK(&lk); WORKITEM_FREE(freefrag, D_FREEFRAG); FREE_LOCK(&lk); @@ -1867,50 +3766,52 @@ softdep_setup_allocext(ip, lbn, newblkno, oldblkno { struct allocdirect *adp, *oldadp; struct allocdirectlst *adphead; - struct bmsafemap *bmsafemap; + struct freefrag *freefrag; struct inodedep *inodedep; + struct jnewblk *jnewblk; struct newblk *newblk; struct mount *mp; + if (lbn >= NXADDR) + panic("softdep_setup_allocext: lbn %lld > NXADDR", + (long long)lbn); + mp = UFSTOVFS(ip->i_ump); - adp = malloc(sizeof(struct allocdirect), - M_ALLOCDIRECT, M_SOFTDEP_FLAGS|M_ZERO); - workitem_alloc(&adp->ad_list, D_ALLOCDIRECT, mp); - adp->ad_lbn = lbn; - adp->ad_newblkno = newblkno; - adp->ad_oldblkno = oldblkno; - adp->ad_newsize = newsize; - adp->ad_oldsize = oldsize; - adp->ad_state = ATTACHED | EXTDATA; - LIST_INIT(&adp->ad_newdirblk); - if (newblkno == oldblkno) - adp->ad_freefrag = NULL; + if (oldblkno && oldblkno != newblkno) + freefrag = newfreefrag(ip, oldblkno, oldsize, lbn); else - adp->ad_freefrag = newfreefrag(ip, oldblkno, oldsize); + freefrag = NULL; ACQUIRE_LOCK(&lk); - if (newblk_lookup(ip->i_fs, newblkno, 0, &newblk) == 0) + if (newblk_lookup(mp, newblkno, 0, &newblk) == 0) panic("softdep_setup_allocext: lost block"); + /* + * Convert the newblk to an allocdirect. + */ + newblk->nb_list.wk_type = D_ALLOCDIRECT; + newblk->nb_freefrag = freefrag; + adp = (struct allocdirect *)newblk; + adp->ad_offset = lbn; + adp->ad_oldblkno = oldblkno; + adp->ad_newsize = newsize; + adp->ad_oldsize = oldsize; + adp->ad_state |= EXTDATA; + /* + * Finish initializing the journal. + */ + if ((jnewblk = newblk->nb_jnewblk) != NULL) { + jnewblk->jn_ino = ip->i_number; + jnewblk->jn_lbn = -1 - lbn; /* Negative lbns for ext. */ + jnewblk->jn_state |= DEPCOMPLETE; + add_to_journal(&jnewblk->jn_list); + } + if (freefrag && freefrag->ff_jfreefrag != NULL) + add_to_journal(&freefrag->ff_jfreefrag->fr_list); inodedep_lookup(mp, ip->i_number, DEPALLOC | NODELAY, &inodedep); adp->ad_inodedep = inodedep; - if (newblk->nb_state == DEPCOMPLETE) { - adp->ad_state |= DEPCOMPLETE; - adp->ad_buf = NULL; - } else { - bmsafemap = newblk->nb_bmsafemap; - adp->ad_buf = bmsafemap->sm_buf; - LIST_REMOVE(newblk, nb_deps); - LIST_INSERT_HEAD(&bmsafemap->sm_allocdirecthd, adp, ad_deps); - } - LIST_REMOVE(newblk, nb_hash); - free(newblk, M_NEWBLK); - - WORKLIST_INSERT(&bp->b_dep, &adp->ad_list); - if (lbn >= NXADDR) - panic("softdep_setup_allocext: lbn %lld > NXADDR", - (long long)lbn); + WORKLIST_INSERT(&bp->b_dep, &newblk->nb_list); /* * The list of allocdirects must be kept in sorted and ascending * order so that the rollback routines can quickly determine the @@ -1925,23 +3826,23 @@ softdep_setup_allocext(ip, lbn, newblkno, oldblkno */ adphead = &inodedep->id_newextupdt; oldadp = TAILQ_LAST(adphead, allocdirectlst); - if (oldadp == NULL || oldadp->ad_lbn <= lbn) { + if (oldadp == NULL || oldadp->ad_offset <= lbn) { /* insert at end of list */ TAILQ_INSERT_TAIL(adphead, adp, ad_next); - if (oldadp != NULL && oldadp->ad_lbn == lbn) + if (oldadp != NULL && oldadp->ad_offset == lbn) allocdirect_merge(adphead, adp, oldadp); FREE_LOCK(&lk); return; } TAILQ_FOREACH(oldadp, adphead, ad_next) { - if (oldadp->ad_lbn >= lbn) + if (oldadp->ad_offset >= lbn) break; } if (oldadp == NULL) panic("softdep_setup_allocext: lost entry"); /* insert in middle of list */ TAILQ_INSERT_BEFORE(oldadp, adp, ad_next); - if (oldadp->ad_lbn == lbn) + if (oldadp->ad_offset == lbn) allocdirect_merge(adphead, adp, oldadp); FREE_LOCK(&lk); } @@ -1975,22 +3876,41 @@ softdep_setup_allocext(ip, lbn, newblkno, oldblkno * Allocate a new allocindir structure. */ static struct allocindir * -newallocindir(ip, ptrno, newblkno, oldblkno) +newallocindir(ip, ptrno, newblkno, oldblkno, lbn) struct inode *ip; /* inode for file being extended */ int ptrno; /* offset of pointer in indirect block */ ufs2_daddr_t newblkno; /* disk block number being added */ ufs2_daddr_t oldblkno; /* previous block number, 0 if none */ + ufs_lbn_t lbn; { + struct newblk *newblk; struct allocindir *aip; + struct freefrag *freefrag; + struct jnewblk *jnewblk; - aip = malloc(sizeof(struct allocindir), - M_ALLOCINDIR, M_SOFTDEP_FLAGS|M_ZERO); - workitem_alloc(&aip->ai_list, D_ALLOCINDIR, UFSTOVFS(ip->i_ump)); - aip->ai_state = ATTACHED; + if (oldblkno) + freefrag = newfreefrag(ip, oldblkno, ip->i_fs->fs_bsize, lbn); + else + freefrag = NULL; + ACQUIRE_LOCK(&lk); + if (newblk_lookup(UFSTOVFS(ip->i_ump), newblkno, 0, &newblk) == 0) + panic("new_allocindir: lost block"); + KASSERT(newblk->nb_list.wk_type == D_NEWBLK, + ("newallocindir: Unexpected newblk state %d", + newblk->nb_list.wk_type)); + newblk->nb_list.wk_type = D_ALLOCINDIR; + newblk->nb_freefrag = freefrag; + aip = (struct allocindir *)newblk; aip->ai_offset = ptrno; - aip->ai_newblkno = newblkno; aip->ai_oldblkno = oldblkno; - aip->ai_freefrag = newfreefrag(ip, oldblkno, ip->i_fs->fs_bsize); + if ((jnewblk = newblk->nb_jnewblk) != NULL) { + jnewblk->jn_ino = ip->i_number; + jnewblk->jn_lbn = lbn; + jnewblk->jn_state |= DEPCOMPLETE; + add_to_journal(&jnewblk->jn_list); + } + if (freefrag && freefrag->ff_jfreefrag != NULL) + add_to_journal(&freefrag->ff_jfreefrag->fr_list); return (aip); } @@ -1999,31 +3919,35 @@ static struct allocindir * * to a newly allocated file page. */ void -softdep_setup_allocindir_page(ip, lbn, bp, ptrno, newblkno, oldblkno, nbp) +softdep_setup_allocindir_page(ip, bp, indir, lvl, lbn, newblkno, oldblkno, nbp) struct inode *ip; /* inode for file being extended */ - ufs_lbn_t lbn; /* allocated block number within file */ struct buf *bp; /* buffer with indirect blk referencing page */ - int ptrno; /* offset of pointer in indirect block */ + struct indir *indir; /* Indirect block path. */ + int lvl; /* Indirect block level for parent. */ + ufs_lbn_t lbn; /* Logical block number of this block. */ ufs2_daddr_t newblkno; /* disk block number being added */ ufs2_daddr_t oldblkno; /* previous block number, 0 if none */ struct buf *nbp; /* buffer holding allocated page */ { + struct inodedep *inodedep; struct allocindir *aip; struct pagedep *pagedep; + struct mount *mp; ASSERT_VOP_LOCKED(ITOV(ip), "softdep_setup_allocindir_page"); - aip = newallocindir(ip, ptrno, newblkno, oldblkno); - ACQUIRE_LOCK(&lk); + mp = UFSTOVFS(ip->i_ump); + aip = newallocindir(ip, indir[lvl].in_off, newblkno, oldblkno, lbn); + (void) inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep); /* * If we are allocating a directory page, then we must * allocate an associated pagedep to track additions and * deletions. */ if ((ip->i_mode & IFMT) == IFDIR && - pagedep_lookup(ip, lbn, DEPALLOC, &pagedep) == 0) + pagedep_lookup(mp, ip->i_number, lbn, DEPALLOC, &pagedep) == 0) WORKLIST_INSERT(&nbp->b_dep, &pagedep->pd_list); - WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list); - setup_allocindir_phase2(bp, ip, aip); + WORKLIST_INSERT(&nbp->b_dep, &aip->ai_block.nb_list); + setup_allocindir_phase2(bp, ip, inodedep, aip, lbn); FREE_LOCK(&lk); } @@ -2032,45 +3956,90 @@ void * newly allocated indirect block. */ void -softdep_setup_allocindir_meta(nbp, ip, bp, ptrno, newblkno) +softdep_setup_allocindir_meta(nbp, ip, bp, indir, lvl, newblkno) struct buf *nbp; /* newly allocated indirect block */ struct inode *ip; /* inode for file being extended */ struct buf *bp; /* indirect block referencing allocated block */ - int ptrno; /* offset of pointer in indirect block */ + struct indir *indir; /* Indirect block path. */ + int lvl; /* Indirect block level this block. */ ufs2_daddr_t newblkno; /* disk block number being added */ { + struct inodedep *inodedep; struct allocindir *aip; + ufs_lbn_t lbn; + lbn = indir[lvl].in_lbn; ASSERT_VOP_LOCKED(ITOV(ip), "softdep_setup_allocindir_meta"); - aip = newallocindir(ip, ptrno, newblkno, 0); + aip = newallocindir(ip, indir[lvl - 1].in_off, newblkno, 0, lbn); + (void) inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, DEPALLOC, + &inodedep); + WORKLIST_INSERT(&nbp->b_dep, &aip->ai_block.nb_list); + setup_allocindir_phase2(bp, ip, inodedep, aip, lbn); + FREE_LOCK(&lk); +} + +static void +handle_workitem_indirdep(indirdep) + struct indirdep *indirdep; +{ + struct buf *sbp; + + sbp = indirdep->ir_savebp; ACQUIRE_LOCK(&lk); - WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list); - setup_allocindir_phase2(bp, ip, aip); + free_indirdep(indirdep); FREE_LOCK(&lk); + sbp->b_flags |= B_INVAL | B_NOCACHE; + brelse(sbp); } +static void +indirdep_complete(indirdep) + struct indirdep *indirdep; +{ + struct allocindir *aip; + + LIST_REMOVE(indirdep, ir_next); + indirdep->ir_state &= ~ONDEPLIST; + + while ((aip = LIST_FIRST(&indirdep->ir_completehd)) != NULL) { + LIST_REMOVE(aip, ai_next); + free_newblk(&aip->ai_block); + } + if ((indirdep->ir_state & ATTACHED) && + LIST_EMPTY(&indirdep->ir_deplisthd) && + LIST_EMPTY(&indirdep->ir_donehd) && + LIST_EMPTY(&indirdep->ir_writehd)) { + if (indirdep->ir_state & ONWORKLIST) + WORKLIST_REMOVE(&indirdep->ir_list); + add_to_worklist(&indirdep->ir_list); + } +} + /* * Called to finish the allocation of the "aip" allocated * by one of the two routines above. */ static void -setup_allocindir_phase2(bp, ip, aip) +setup_allocindir_phase2(bp, ip, inodedep, aip, lbn) struct buf *bp; /* in-memory copy of the indirect block */ struct inode *ip; /* inode for file being extended */ + struct inodedep *inodedep; /* Inodedep for ip */ struct allocindir *aip; /* allocindir allocated by the above routines */ + ufs_lbn_t lbn; /* Logical block number for this block. */ { struct worklist *wk; + struct newblk *newblk; struct indirdep *indirdep, *newindirdep; - struct bmsafemap *bmsafemap; struct allocindir *oldaip; struct freefrag *freefrag; - struct newblk *newblk; + struct mount *mp; ufs2_daddr_t blkno; + mp = UFSTOVFS(ip->i_ump); mtx_assert(&lk, MA_OWNED); if (bp->b_lblkno >= 0) panic("setup_allocindir_phase2: not indir blk"); - for (indirdep = NULL, newindirdep = NULL; ; ) { + for (freefrag = NULL, indirdep = NULL, newindirdep = NULL; ; ) { LIST_FOREACH(wk, &bp->b_dep, wk_list) { if (wk->wk_type != D_INDIRDEP) continue; @@ -2079,30 +4048,22 @@ static void } if (indirdep == NULL && newindirdep) { indirdep = newindirdep; + newindirdep = NULL; WORKLIST_INSERT(&bp->b_dep, &indirdep->ir_list); - newindirdep = NULL; + if (newblk_lookup(mp, bp->b_blkno, 0, &newblk)) { + indirdep->ir_state |= ONDEPLIST; + LIST_INSERT_HEAD(&newblk->nb_indirdeps, + indirdep, ir_next); + } else + indirdep->ir_state |= DEPCOMPLETE; } if (indirdep) { - if (newblk_lookup(ip->i_fs, aip->ai_newblkno, 0, - &newblk) == 0) - panic("setup_allocindir: lost block"); - if (newblk->nb_state == DEPCOMPLETE) { - aip->ai_state |= DEPCOMPLETE; - aip->ai_buf = NULL; - } else { - bmsafemap = newblk->nb_bmsafemap; - aip->ai_buf = bmsafemap->sm_buf; - LIST_REMOVE(newblk, nb_deps); - LIST_INSERT_HEAD(&bmsafemap->sm_allocindirhd, - aip, ai_deps); - } - LIST_REMOVE(newblk, nb_hash); - free(newblk, M_NEWBLK); aip->ai_indirdep = indirdep; /* * Check to see if there is an existing dependency * for this block. If there is, merge the old - * dependency into the new one. + * dependency into the new one. This happens + * as a result of reallocblk only. */ if (aip->ai_oldblkno == 0) oldaip = NULL; @@ -2111,17 +4072,15 @@ static void LIST_FOREACH(oldaip, &indirdep->ir_deplisthd, ai_next) if (oldaip->ai_offset == aip->ai_offset) break; - freefrag = NULL; - if (oldaip != NULL) { - if (oldaip->ai_newblkno != aip->ai_oldblkno) - panic("setup_allocindir_phase2: blkno"); - aip->ai_oldblkno = oldaip->ai_oldblkno; - freefrag = aip->ai_freefrag; - aip->ai_freefrag = oldaip->ai_freefrag; - oldaip->ai_freefrag = NULL; - free_allocindir(oldaip, NULL); - } + if (oldaip != NULL) + freefrag = allocindir_merge(aip, oldaip); LIST_INSERT_HEAD(&indirdep->ir_deplisthd, aip, ai_next); + KASSERT(aip->ai_offset >= 0 && + aip->ai_offset < NINDIR(ip->i_ump->um_fs), + ("setup_allocindir_phase2: Bad offset %d", + aip->ai_offset)); + KASSERT(indirdep->ir_savebp != NULL, + ("setup_allocindir_phase2 NULL ir_savebp")); if (ip->i_ump->um_fstype == UFS1) ((ufs1_daddr_t *)indirdep->ir_savebp->b_data) [aip->ai_offset] = aip->ai_oldblkno; @@ -2148,13 +4107,16 @@ static void } newindirdep = malloc(sizeof(struct indirdep), M_INDIRDEP, M_SOFTDEP_FLAGS); - workitem_alloc(&newindirdep->ir_list, D_INDIRDEP, - UFSTOVFS(ip->i_ump)); + workitem_alloc(&newindirdep->ir_list, D_INDIRDEP, mp); newindirdep->ir_state = ATTACHED; if (ip->i_ump->um_fstype == UFS1) newindirdep->ir_state |= UFS1FMT; + newindirdep->ir_saveddata = NULL; LIST_INIT(&newindirdep->ir_deplisthd); LIST_INIT(&newindirdep->ir_donehd); + LIST_INIT(&newindirdep->ir_writehd); + LIST_INIT(&newindirdep->ir_completehd); + LIST_INIT(&newindirdep->ir_jwork); if (bp->b_blkno == bp->b_lblkno) { ufs_bmaparray(bp->b_vp, bp->b_lblkno, &blkno, bp, NULL, NULL); @@ -2168,6 +4130,33 @@ static void } } +static struct freefrag * +allocindir_merge(aip, oldaip) + struct allocindir *aip; + struct allocindir *oldaip; +{ + struct freefrag *freefrag; + + if (oldaip->ai_newblkno != aip->ai_oldblkno) + panic("allocindir_merge: blkno"); + aip->ai_oldblkno = oldaip->ai_oldblkno; + freefrag = aip->ai_freefrag; + aip->ai_freefrag = oldaip->ai_freefrag; + oldaip->ai_freefrag = NULL; + KASSERT(freefrag != NULL, ("setup_allocindir_phase2: No freefrag")); + /* + * We can skip journaling for this freefrag and just complete + * any pending journal work for the allocindir that is being + * removed after the freefrag completes. + */ + cancel_jfreefrag(freefrag->ff_jfreefrag); + cancel_newblk(&oldaip->ai_block, &freefrag->ff_jwork, D_FREEFRAG); + LIST_REMOVE(oldaip, ai_next); + free_newblk(&oldaip->ai_block); + + return (freefrag); +} + /* * Block de-allocation dependencies. * @@ -2206,6 +4195,7 @@ softdep_setup_freeblocks(ip, length, flags) struct freeblks *freeblks; struct inodedep *inodedep; struct allocdirect *adp; + struct jfreeblk *jfreeblk; struct bufobj *bo; struct vnode *vp; struct buf *bp; @@ -2213,6 +4203,13 @@ softdep_setup_freeblocks(ip, length, flags) ufs2_daddr_t extblocks, datablocks; struct mount *mp; int i, delay, error; + ufs2_daddr_t blkno; + ufs_lbn_t tmpval; + ufs_lbn_t lbn; + long oldextsize; + long oldsize; + int frags; + int needj; fs = ip->i_fs; mp = UFSTOVFS(ip->i_ump); @@ -2221,32 +4218,52 @@ softdep_setup_freeblocks(ip, length, flags) freeblks = malloc(sizeof(struct freeblks), M_FREEBLKS, M_SOFTDEP_FLAGS|M_ZERO); workitem_alloc(&freeblks->fb_list, D_FREEBLKS, mp); + LIST_INIT(&freeblks->fb_jfreeblkhd); + LIST_INIT(&freeblks->fb_jwork); freeblks->fb_state = ATTACHED; freeblks->fb_uid = ip->i_uid; freeblks->fb_previousinum = ip->i_number; freeblks->fb_devvp = ip->i_devvp; + freeblks->fb_chkcnt = 0; ACQUIRE_LOCK(&lk); + /* + * If we're truncating a removed file that will never be written + * we don't need to journal the block frees. The canceled journals + * for the allocations will suffice. + */ + (void) inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep); + if ((inodedep->id_state & DEPCOMPLETE) == 0 && ip->i_nlink == 0) + needj = 0; + else + needj = 1; num_freeblkdep++; FREE_LOCK(&lk); extblocks = 0; if (fs->fs_magic == FS_UFS2_MAGIC) extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize)); datablocks = DIP(ip, i_blocks) - extblocks; - if ((flags & IO_NORMAL) == 0) { - freeblks->fb_oldsize = 0; - freeblks->fb_chkcnt = 0; - } else { - freeblks->fb_oldsize = ip->i_size; + if ((flags & IO_NORMAL) != 0) { + oldsize = ip->i_size; ip->i_size = 0; DIP_SET(ip, i_size, 0); freeblks->fb_chkcnt = datablocks; for (i = 0; i < NDADDR; i++) { - freeblks->fb_dblks[i] = DIP(ip, i_db[i]); + blkno = DIP(ip, i_db[i]); DIP_SET(ip, i_db[i], 0); + if (blkno == 0) + continue; + frags = sblksize(fs, oldsize, i); + frags = numfrags(fs, frags); + newfreework(freeblks, NULL, i, blkno, frags, needj); } - for (i = 0; i < NIADDR; i++) { - freeblks->fb_iblks[i] = DIP(ip, i_ib[i]); + for (i = 0, tmpval = NINDIR(fs), lbn = NDADDR; i < NIADDR; + i++, tmpval *= NINDIR(fs)) { + blkno = DIP(ip, i_ib[i]); DIP_SET(ip, i_ib[i], 0); + if (blkno) + newfreework(freeblks, NULL, -lbn - i, blkno, + fs->fs_frag, needj); + lbn += tmpval; } /* * If the file was removed, then the space being freed was @@ -2259,17 +4276,23 @@ softdep_setup_freeblocks(ip, length, flags) UFS_UNLOCK(ip->i_ump); } } - if ((flags & IO_EXT) == 0) { - freeblks->fb_oldextsize = 0; - } else { - freeblks->fb_oldextsize = ip->i_din2->di_extsize; + if ((flags & IO_EXT) != 0) { + oldextsize = ip->i_din2->di_extsize; ip->i_din2->di_extsize = 0; freeblks->fb_chkcnt += extblocks; for (i = 0; i < NXADDR; i++) { - freeblks->fb_eblks[i] = ip->i_din2->di_extb[i]; + blkno = ip->i_din2->di_extb[i]; ip->i_din2->di_extb[i] = 0; + if (blkno == 0) + continue; + frags = sblksize(fs, oldextsize, i); + frags = numfrags(fs, frags); + newfreework(freeblks, NULL, -1 - i, blkno, frags, + needj); } } + if (LIST_EMPTY(&freeblks->fb_jfreeblkhd)) + needj = 0; DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - freeblks->fb_chkcnt); /* * Push the zero'ed inode to to its disk buffer so that we are free @@ -2304,7 +4327,9 @@ softdep_setup_freeblocks(ip, length, flags) */ delay = (inodedep->id_state & DEPCOMPLETE); if (delay) - WORKLIST_INSERT(&inodedep->id_bufwait, &freeblks->fb_list); + WORKLIST_INSERT(&bp->b_dep, &freeblks->fb_list); + else if (needj) + freeblks->fb_state |= DEPCOMPLETE | COMPLETE; /* * Because the file length has been truncated to zero, any * pending block allocation dependency structures associated @@ -2318,14 +4343,19 @@ softdep_setup_freeblocks(ip, length, flags) merge_inode_lists(&inodedep->id_newinoupdt, &inodedep->id_inoupdt); while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0) - free_allocdirect(&inodedep->id_inoupdt, adp, delay); + cancel_allocdirect(&inodedep->id_inoupdt, adp, + freeblks, delay); } if (flags & IO_EXT) { merge_inode_lists(&inodedep->id_newextupdt, &inodedep->id_extupdt); while ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != 0) - free_allocdirect(&inodedep->id_extupdt, adp, delay); + cancel_allocdirect(&inodedep->id_extupdt, adp, + freeblks, delay); } + LIST_FOREACH(jfreeblk, &freeblks->fb_jfreeblkhd, jf_deps) + add_to_journal(&jfreeblk->jf_list); + FREE_LOCK(&lk); bdwrite(bp); /* @@ -2349,9 +4379,9 @@ restart: BO_UNLOCK(bo); ACQUIRE_LOCK(&lk); (void) inodedep_lookup(mp, ip->i_number, 0, &inodedep); - deallocate_dependencies(bp, inodedep); + if (deallocate_dependencies(bp, inodedep, freeblks)) + bp->b_flags |= B_INVAL | B_NOCACHE; FREE_LOCK(&lk); - bp->b_flags |= B_INVAL | B_NOCACHE; brelse(bp); BO_LOCK(bo); goto restart; @@ -2361,7 +4391,7 @@ restart: if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) != 0) (void) free_inodedep(inodedep); - if(delay) { + if (delay) { freeblks->fb_state |= DEPCOMPLETE; /* * If the inode with zeroed block pointers is now on disk @@ -2376,11 +4406,11 @@ restart: FREE_LOCK(&lk); /* - * If the inode has never been written to disk (delay == 0), - * then we can process the freeblks now that we have deleted - * the dependencies. + * If the inode has never been written to disk (delay == 0) and + * we're not waiting on any journal writes, then we can process the + * freeblks now that we have deleted the dependencies. */ - if (!delay) + if (!delay && !needj) handle_workitem_freeblocks(freeblks, 0); } @@ -2389,19 +4419,22 @@ restart: * be reallocated to a new vnode. The buffer must be locked, thus, * no I/O completion operations can occur while we are manipulating * its associated dependencies. The mutex is held so that other I/O's - * associated with related dependencies do not occur. + * associated with related dependencies do not occur. Returns 1 if + * all dependencies were cleared, 0 otherwise. */ -static void -deallocate_dependencies(bp, inodedep) +static int +deallocate_dependencies(bp, inodedep, freeblks) struct buf *bp; struct inodedep *inodedep; + struct freeblks *freeblks; { struct worklist *wk; struct indirdep *indirdep; + struct newdirblk *newdirblk; struct allocindir *aip; struct pagedep *pagedep; + struct jremref *jremref; struct dirrem *dirrem; - struct diradd *dap; int i; mtx_assert(&lk, MA_OWNED); @@ -2424,15 +4457,15 @@ restart: * copy, allowing the safe copy to be freed and holding * on to the real copy for later use in indir_trunc. */ + if (bp->b_lblkno >= 0 || + bp->b_blkno != indirdep->ir_savebp->b_lblkno) + panic("deallocate_dependencies: not indir"); if (indirdep->ir_state & GOINGAWAY) panic("deallocate_dependencies: already gone"); indirdep->ir_state |= GOINGAWAY; VFSTOUFS(bp->b_vp->v_mount)->um_numindirdeps += 1; while ((aip = LIST_FIRST(&indirdep->ir_deplisthd)) != 0) - free_allocindir(aip, inodedep); - if (bp->b_lblkno >= 0 || - bp->b_blkno != indirdep->ir_savebp->b_lblkno) - panic("deallocate_dependencies: not indir"); + cancel_allocindir(aip, inodedep, freeblks); bcopy(bp->b_data, indirdep->ir_savebp->b_data, bp->b_bcount); WORKLIST_REMOVE(wk); @@ -2442,15 +4475,15 @@ restart: case D_PAGEDEP: pagedep = WK_PAGEDEP(wk); /* - * None of the directory additions will ever be - * visible, so they can simply be tossed. + * There should be no directory add dependencies present + * as the directory could not be truncated until all + * children were removed. */ + KASSERT(LIST_FIRST(&pagedep->pd_pendinghd) == NULL, + ("deallocate_dependencies: pendinghd != NULL")); for (i = 0; i < DAHASHSZ; i++) - while ((dap = - LIST_FIRST(&pagedep->pd_diraddhd[i]))) - free_diradd(dap); - while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != 0) - free_diradd(dap); + KASSERT(LIST_FIRST(&pagedep->pd_diraddhd[i]) == NULL, + ("deallocate_dependencies: diraddhd != NULL")); /* * Copy any directory remove dependencies to the list * to be processed after the zero'ed inode is written. @@ -2458,28 +4491,33 @@ restart: * can be dumped directly onto the work list. */ LIST_FOREACH(dirrem, &pagedep->pd_dirremhd, dm_next) { + /* + * If there are any dirrems we wait for + * the journal write to complete and + * then restart the buf scan as the lock + * has been dropped. + */ + while ((jremref = + LIST_FIRST(&dirrem->dm_jremrefhd)) + != NULL) { + jwait(&jremref->jr_list); + return (0); + } LIST_REMOVE(dirrem, dm_next); dirrem->dm_dirinum = pagedep->pd_ino; if (inodedep == NULL || (inodedep->id_state & ALLCOMPLETE) == - ALLCOMPLETE) + ALLCOMPLETE) { + dirrem->dm_state |= COMPLETE; add_to_worklist(&dirrem->dm_list); - else + } else WORKLIST_INSERT(&inodedep->id_bufwait, &dirrem->dm_list); } if ((pagedep->pd_state & NEWBLOCK) != 0) { - LIST_FOREACH(wk, &inodedep->id_bufwait, wk_list) - if (wk->wk_type == D_NEWDIRBLK && - WK_NEWDIRBLK(wk)->db_pagedep == - pagedep) - break; - if (wk != NULL) { - WORKLIST_REMOVE(wk); - free_newdirblk(WK_NEWDIRBLK(wk)); - } else - panic("deallocate_dependencies: " - "lost pagedep"); + newdirblk = pagedep->pd_newdirblk; + WORKLIST_REMOVE(&newdirblk->db_list); + free_newdirblk(newdirblk); } WORKLIST_REMOVE(&pagedep->pd_list); LIST_REMOVE(pagedep, pd_hash); @@ -2487,7 +4525,8 @@ restart: continue; case D_ALLOCINDIR: - free_allocindir(WK_ALLOCINDIR(wk), inodedep); + aip = WK_ALLOCINDIR(wk); + cancel_allocindir(aip, inodedep, freeblks); continue; case D_ALLOCDIRECT: @@ -2502,46 +4541,133 @@ restart: /* NOTREACHED */ } } + + return (1); } /* - * Free an allocdirect. Generate a new freefrag work request if appropriate. - * This routine must be called with splbio interrupts blocked. + * An allocdirect is being canceled due to a truncate. We must make sure + * the journal entry is released in concert with the blkfree that releases + * the storage. Completed journal entries must not be released until the + * space is no longer pointed to by the inode or in the bitmap. */ static void -free_allocdirect(adphead, adp, delay) +cancel_allocdirect(adphead, adp, freeblks, delay) struct allocdirectlst *adphead; struct allocdirect *adp; + struct freeblks *freeblks; int delay; { + struct freework *freework; + struct newblk *newblk; + struct worklist *wk; + ufs_lbn_t lbn; + + TAILQ_REMOVE(adphead, adp, ad_next); + newblk = (struct newblk *)adp; + if (newblk->nb_state & ONWORKLIST) + WORKLIST_REMOVE(&newblk->nb_list); + /* + * If the journal hasn't been written the jnewblk must be passed + * to the call to ffs_freeblk that reclaims the space. We accomplish + * this by linking the journal dependency into the freework to be + * freed when freework_freeblock() is called. If the journal has + * been written we can simply reclaim the journal space when the + * freeblks work is complete. + */ + if (newblk->nb_jnewblk == NULL) { + cancel_newblk(newblk, &freeblks->fb_jwork, D_FREEBLKS); + goto found; + } + lbn = newblk->nb_jnewblk->jn_lbn; + /* + * Find the correct freework structure so it releases the canceled + * journal when the bitmap is cleared. This preserves rollback + * until the allocation is reverted. + */ + LIST_FOREACH(wk, &freeblks->fb_freeworkhd, wk_list) { + freework = WK_FREEWORK(wk); + if (freework->fw_lbn != lbn) + continue; + cancel_newblk(newblk, &freework->fw_jwork, D_FREEWORK); + goto found; + } + panic("cancel_allocdirect: Freework not found for lbn %jd\n", lbn); +found: + if (delay) + WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait, + &newblk->nb_list); + else + free_newblk(newblk); + return; +} + + +static void +cancel_newblk(newblk, wkhd, type) + struct newblk *newblk; + struct workhead *wkhd; + short type; +{ + struct indirdep *indirdep; + + while ((indirdep = LIST_FIRST(&newblk->nb_indirdeps)) != NULL) { + indirdep->ir_state &= ~ONDEPLIST; + LIST_REMOVE(indirdep, ir_next); + } + /* + * If the journal entry hasn't been written we hold onto the dep + * until it is safe to free along with the other journal work. + */ + if (newblk->nb_jnewblk != NULL) { + cancel_jnewblk(newblk->nb_jnewblk, wkhd); + newblk->nb_jnewblk = NULL; + } + if (!LIST_EMPTY(&newblk->nb_jwork)) + jwork_move(type, __LINE__, wkhd, &newblk->nb_jwork); +} + +/* + * Free a newblk. Generate a new freefrag work request if appropriate. + * This must be called after the inode pointer and any direct block pointers + * are valid or fully removed via truncate or frag extension. + */ +static void +free_newblk(newblk) + struct newblk *newblk; +{ + struct indirdep *indirdep; struct newdirblk *newdirblk; + struct freefrag *freefrag; struct worklist *wk; mtx_assert(&lk, MA_OWNED); - if ((adp->ad_state & DEPCOMPLETE) == 0) - LIST_REMOVE(adp, ad_deps); - TAILQ_REMOVE(adphead, adp, ad_next); - if ((adp->ad_state & COMPLETE) == 0) - WORKLIST_REMOVE(&adp->ad_list); - if (adp->ad_freefrag != NULL) { - if (delay) - WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait, - &adp->ad_freefrag->ff_list); - else - add_to_worklist(&adp->ad_freefrag->ff_list); + if (newblk->nb_state & ONDEPLIST) + LIST_REMOVE(newblk, nb_deps); + if (newblk->nb_state & ONWORKLIST) + WORKLIST_REMOVE(&newblk->nb_list); + LIST_REMOVE(newblk, nb_hash); + if ((freefrag = newblk->nb_freefrag) != NULL) { + freefrag->ff_state |= COMPLETE; + if ((freefrag->ff_state & ALLCOMPLETE) == ALLCOMPLETE) + add_to_worklist(&freefrag->ff_list); } - if ((wk = LIST_FIRST(&adp->ad_newdirblk)) != NULL) { + if ((wk = LIST_FIRST(&newblk->nb_newdirblk)) != NULL) { newdirblk = WK_NEWDIRBLK(wk); WORKLIST_REMOVE(&newdirblk->db_list); - if (!LIST_EMPTY(&adp->ad_newdirblk)) - panic("free_allocdirect: extra newdirblk"); - if (delay) - WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait, - &newdirblk->db_list); - else - free_newdirblk(newdirblk); + if (!LIST_EMPTY(&newblk->nb_newdirblk)) + panic("free_newblk: extra newdirblk"); + free_newdirblk(newdirblk); } - WORKITEM_FREE(adp, D_ALLOCDIRECT); + while ((indirdep = LIST_FIRST(&newblk->nb_indirdeps)) != NULL) { + indirdep->ir_state |= DEPCOMPLETE; + indirdep_complete(indirdep); + } + KASSERT(newblk->nb_jnewblk == NULL, + ("free_newblk; jnewblk %p still attached", newblk->nb_jnewblk)); + handle_jwork(&newblk->nb_jwork); + newblk->nb_list.wk_type = D_NEWBLK; + WORKITEM_FREE(newblk, D_NEWBLK); } /* @@ -2554,6 +4680,7 @@ free_newdirblk(newdirblk) { struct pagedep *pagedep; struct diradd *dap; + struct worklist *wk; int i; mtx_assert(&lk, MA_OWNED); @@ -2571,7 +4698,7 @@ free_newdirblk(newdirblk) pagedep->pd_state &= ~NEWBLOCK; if ((pagedep->pd_state & ONWORKLIST) == 0) while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL) - free_diradd(dap); + free_diradd(dap, NULL); /* * If no dependencies remain, the pagedep will be freed. */ @@ -2579,9 +4706,16 @@ free_newdirblk(newdirblk) if (!LIST_EMPTY(&pagedep->pd_diraddhd[i])) break; if (i == DAHASHSZ && (pagedep->pd_state & ONWORKLIST) == 0) { + KASSERT(LIST_FIRST(&pagedep->pd_dirremhd) == NULL, + ("free_newdirblk: Freeing non-free pagedep %p", pagedep)); LIST_REMOVE(pagedep, pd_hash); WORKITEM_FREE(pagedep, D_PAGEDEP); } + /* Should only ever be one item in the list. */ + while ((wk = LIST_FIRST(&newdirblk->db_mkdir)) != NULL) { + WORKLIST_REMOVE(wk); + handle_written_mkdir(WK_MKDIR(wk), MKDIR_BODY); + } WORKITEM_FREE(newdirblk, D_NEWDIRBLK); } @@ -2608,6 +4742,7 @@ softdep_freefile(pvp, ino, mode) freefile->fx_mode = mode; freefile->fx_oldinum = ino; freefile->fx_devvp = ip->i_devvp; + LIST_INIT(&freefile->fx_jwork); if ((ip->i_flag & IN_SPACECOUNTED) == 0) { UFS_LOCK(ip->i_ump); ip->i_fs->fs_pendinginodes += 1; @@ -2618,15 +4753,24 @@ softdep_freefile(pvp, ino, mode) * If the inodedep does not exist, then the zero'ed inode has * been written to disk. If the allocated inode has never been * written to disk, then the on-disk inode is zero'ed. In either - * case we can free the file immediately. + * case we can free the file immediately. If the journal was + * canceled before being written the inode will never make it to + * disk and we must send the canceled journal entrys to + * ffs_freefile() to be cleared in conjunction with the bitmap. + * Any blocks waiting on the inode to write can be safely freed + * here as it will never been written. */ ACQUIRE_LOCK(&lk); - if (inodedep_lookup(pvp->v_mount, ino, 0, &inodedep) == 0 || - check_inode_unwritten(inodedep)) { + inodedep_lookup(pvp->v_mount, ino, 0, &inodedep); + if (inodedep && (inodedep->id_state & DEPCOMPLETE) == 0 && + !LIST_EMPTY(&inodedep->id_jaddrefhd)) + handle_bufwait(inodedep, &freefile->fx_jwork); + if (inodedep == NULL || check_inode_unwritten(inodedep)) { FREE_LOCK(&lk); handle_workitem_freefile(freefile); return; } + inodedep->id_state |= GOINGAWAY; WORKLIST_INSERT(&inodedep->id_inowait, &freefile->fx_list); FREE_LOCK(&lk); if (ip->i_number == ino) @@ -2654,6 +4798,19 @@ check_inode_unwritten(inodedep) { mtx_assert(&lk, MA_OWNED); + /* + * The inode is unwritten but we have some canceled jaddrefs still, + * the inode will never be written but it is not yet safe to be + * freed either. Return 1 so callers don't place items on the + * bufwait/inowait lists that will never be written. + * + * XXX This breaks if the jaddrefhd is not canceled but is UNDONE + * and complete. + */ + if ((inodedep->id_state & DEPCOMPLETE) == 0 && + !LIST_EMPTY(&inodedep->id_jaddrefhd)) + return (1); + if ((inodedep->id_state & DEPCOMPLETE) != 0 || !LIST_EMPTY(&inodedep->id_pendinghd) || !LIST_EMPTY(&inodedep->id_bufwait) || @@ -2662,9 +4819,9 @@ check_inode_unwritten(inodedep) !TAILQ_EMPTY(&inodedep->id_newinoupdt) || !TAILQ_EMPTY(&inodedep->id_extupdt) || !TAILQ_EMPTY(&inodedep->id_newextupdt) || + inodedep->id_mkdiradd != NULL || inodedep->id_nlinkdelta != 0) return (0); - /* * Another process might be in initiate_write_inodeblock_ufs[12] * trying to allocate memory without holding "Softdep Lock". @@ -2673,9 +4830,11 @@ check_inode_unwritten(inodedep) inodedep->id_savedino1 == NULL) return (0); + if (inodedep->id_state & ONDEPLIST) + LIST_REMOVE(inodedep, id_deps); + inodedep->id_state &= ~ONDEPLIST; inodedep->id_state |= ALLCOMPLETE; - LIST_REMOVE(inodedep, id_deps); - inodedep->id_buf = NULL; + inodedep->id_bmsafemap = NULL; if (inodedep->id_state & ONWORKLIST) WORKLIST_REMOVE(&inodedep->id_list); if (inodedep->id_savedino1 != NULL) { @@ -2701,12 +4860,17 @@ free_inodedep(inodedep) !LIST_EMPTY(&inodedep->id_pendinghd) || !LIST_EMPTY(&inodedep->id_bufwait) || !LIST_EMPTY(&inodedep->id_inowait) || + !LIST_EMPTY(&inodedep->id_jaddrefhd) || !TAILQ_EMPTY(&inodedep->id_inoupdt) || !TAILQ_EMPTY(&inodedep->id_newinoupdt) || !TAILQ_EMPTY(&inodedep->id_extupdt) || !TAILQ_EMPTY(&inodedep->id_newextupdt) || - inodedep->id_nlinkdelta != 0 || inodedep->id_savedino1 != NULL) + inodedep->id_mkdiradd != NULL || + inodedep->id_nlinkdelta != 0 || + inodedep->id_savedino1 != NULL) return (0); + if (inodedep->id_state & ONDEPLIST) + LIST_REMOVE(inodedep, id_deps); LIST_REMOVE(inodedep, id_hash); WORKITEM_FREE(inodedep, D_INODEDEP); num_inodedep -= 1; @@ -2714,6 +4878,119 @@ free_inodedep(inodedep) } /* + * Free the block referenced by a freework structure. The parent freeblks + * structure is released and completed when the final cg bitmap reaches + * the disk. This routine may be freeing a jnewblk which never made it to + * disk in which case we do not have to wait as the operation is undone + * in memory immediately. + */ +static void +freework_freeblock(freework) + struct freework *freework; +{ + struct freeblks *freeblks; + struct ufsmount *ump; + struct workhead wkhd; + struct fs *fs; + int complete; + int pending; + int bsize; + + freeblks = freework->fw_freeblks; + ump = VFSTOUFS(freeblks->fb_list.wk_mp); + fs = ump->um_fs; + complete = 0; + LIST_INIT(&wkhd); + /* + * If we are canceling an existing jnewblk pass it to the free + * routine, otherwise pass the freeblk which will ultimately + * release the freeblks + */ + if (!LIST_EMPTY(&freework->fw_jwork)) { + LIST_SWAP(&wkhd, &freework->fw_jwork, worklist, wk_list); + complete = 1; + } else + WORKLIST_INSERT_UNLOCKED(&wkhd, &freework->fw_list); + bsize = lfragtosize(fs, freework->fw_frags); + pending = btodb(bsize); + ACQUIRE_LOCK(&lk); + freeblks->fb_chkcnt -= pending; + FREE_LOCK(&lk); + /* + * extattr blocks don't show up in pending blocks. XXX why? + */ + if (freework->fw_lbn >= 0 || freework->fw_lbn <= -NDADDR) { + UFS_LOCK(ump); + fs->fs_pendingblocks -= pending; + UFS_UNLOCK(ump); + } + ffs_blkfree(ump, fs, freeblks->fb_devvp, freework->fw_blkno, + bsize, freeblks->fb_previousinum, &wkhd); + if (complete == 0) + return; + /* + * The jnewblk will be discarded and the bits in the map never + * made it to disk. We can immediately free the freeblk. + */ + ACQUIRE_LOCK(&lk); + handle_written_freework(freework); + FREE_LOCK(&lk); +} + +/* + * Start, continue, or finish the process of freeing an indirect block tree. + * The free operation may be paused at any point with fw_off containing the + * offset to restart from. This enables us to implement some flow control + * for large truncates which may fan out and generate a huge number of + * dependencies. + */ +static void +handle_workitem_indirblk(freework) + struct freework *freework; +{ + struct freeblks *freeblks; + struct ufsmount *ump; + struct fs *fs; + + + freeblks = freework->fw_freeblks; + ump = VFSTOUFS(freeblks->fb_list.wk_mp); + fs = ump->um_fs; + if (freework->fw_off == NINDIR(fs)) + freework_freeblock(freework); + else + indir_trunc(freework, fsbtodb(fs, freework->fw_blkno), + freework->fw_lbn); +} + +/* + * Called when a freework structure attached to a cg buf is written. The + * ref on either the parent or the freeblks structure is released and + * either may be added to the worklist if it is the final ref. + */ +static void +handle_written_freework(freework) + struct freework *freework; +{ + struct freeblks *freeblks; + struct freework *parent; + + freeblks = freework->fw_freeblks; + parent = freework->fw_parent; + if (parent) { + if (--parent->fw_ref != 0) + parent = NULL; + freeblks = NULL; + } else if (--freeblks->fb_ref != 0) + freeblks = NULL; + WORKITEM_FREE(freework, D_FREEWORK); + if (freeblks) + add_to_worklist(&freeblks->fb_list); + if (parent) + add_to_worklist(&parent->fw_list); +} + +/* * This workitem routine performs the block de-allocation. * The workitem is added to the pending list after the updated * inode block has been written to disk. As mentioned above, @@ -2726,99 +5003,79 @@ handle_workitem_freeblocks(freeblks, flags) struct freeblks *freeblks; int flags; { + struct freework *freework; + struct worklist *wk; + + KASSERT(LIST_EMPTY(&freeblks->fb_jfreeblkhd), + ("handle_workitem_freeblocks: Journal entries not written.")); + if (LIST_EMPTY(&freeblks->fb_freeworkhd)) { + handle_complete_freeblocks(freeblks); + return; + } + freeblks->fb_ref++; + while ((wk = LIST_FIRST(&freeblks->fb_freeworkhd)) != NULL) { + KASSERT(wk->wk_type == D_FREEWORK, + ("handle_workitem_freeblocks: Unknown type %s", + TYPENAME(wk->wk_type))); + WORKLIST_REMOVE_UNLOCKED(wk); + freework = WK_FREEWORK(wk); + if (freework->fw_lbn <= -NDADDR) + handle_workitem_indirblk(freework); + else + freework_freeblock(freework); + } + ACQUIRE_LOCK(&lk); + if (--freeblks->fb_ref != 0) + freeblks = NULL; + FREE_LOCK(&lk); + if (freeblks) + handle_complete_freeblocks(freeblks); +} + +/* + * Once all of the freework workitems are complete we can retire the + * freeblocks dependency and any journal work awaiting completion. This + * can not be called until all other dependencies are stable on disk. + */ +static void +handle_complete_freeblocks(freeblks) + struct freeblks *freeblks; +{ struct inode *ip; struct vnode *vp; struct fs *fs; struct ufsmount *ump; - int i, nblocks, level, bsize; - ufs2_daddr_t bn, blocksreleased = 0; - int error, allerror = 0; - ufs_lbn_t baselbns[NIADDR], tmpval; - int fs_pendingblocks; + int flags; ump = VFSTOUFS(freeblks->fb_list.wk_mp); fs = ump->um_fs; - fs_pendingblocks = 0; - tmpval = 1; - baselbns[0] = NDADDR; - for (i = 1; i < NIADDR; i++) { - tmpval *= NINDIR(fs); - baselbns[i] = baselbns[i - 1] + tmpval; - } - nblocks = btodb(fs->fs_bsize); - blocksreleased = 0; + flags = LK_NOWAIT; + /* - * Release all extended attribute blocks or frags. - */ - if (freeblks->fb_oldextsize > 0) { - for (i = (NXADDR - 1); i >= 0; i--) { - if ((bn = freeblks->fb_eblks[i]) == 0) - continue; - bsize = sblksize(fs, freeblks->fb_oldextsize, i); - ffs_blkfree(ump, fs, freeblks->fb_devvp, bn, bsize, - freeblks->fb_previousinum); - blocksreleased += btodb(bsize); - } - } - /* - * Release all data blocks or frags. - */ - if (freeblks->fb_oldsize > 0) { - /* - * Indirect blocks first. - */ - for (level = (NIADDR - 1); level >= 0; level--) { - if ((bn = freeblks->fb_iblks[level]) == 0) - continue; - if ((error = indir_trunc(freeblks, fsbtodb(fs, bn), - level, baselbns[level], &blocksreleased)) != 0) - allerror = error; - ffs_blkfree(ump, fs, freeblks->fb_devvp, bn, - fs->fs_bsize, freeblks->fb_previousinum); - fs_pendingblocks += nblocks; - blocksreleased += nblocks; - } - /* - * All direct blocks or frags. - */ - for (i = (NDADDR - 1); i >= 0; i--) { - if ((bn = freeblks->fb_dblks[i]) == 0) - continue; - bsize = sblksize(fs, freeblks->fb_oldsize, i); - ffs_blkfree(ump, fs, freeblks->fb_devvp, bn, bsize, - freeblks->fb_previousinum); - fs_pendingblocks += btodb(bsize); - blocksreleased += btodb(bsize); - } - } - UFS_LOCK(ump); - fs->fs_pendingblocks -= fs_pendingblocks; - UFS_UNLOCK(ump); - /* * If we still have not finished background cleanup, then check * to see if the block count needs to be adjusted. */ - if (freeblks->fb_chkcnt != blocksreleased && - (fs->fs_flags & FS_UNCLEAN) != 0 && + if (freeblks->fb_chkcnt != 0 && (fs->fs_flags & FS_UNCLEAN) != 0 && ffs_vgetf(freeblks->fb_list.wk_mp, freeblks->fb_previousinum, - (flags & LK_NOWAIT) | LK_EXCLUSIVE, &vp, FFSV_FORCEINSMQ) - == 0) { + (flags & LK_NOWAIT) | LK_EXCLUSIVE, &vp, FFSV_FORCEINSMQ) == 0) { ip = VTOI(vp); - DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + \ - freeblks->fb_chkcnt - blocksreleased); + DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + freeblks->fb_chkcnt); ip->i_flag |= IN_CHANGE; vput(vp); } #ifdef INVARIANTS - if (freeblks->fb_chkcnt != blocksreleased && + if (freeblks->fb_chkcnt != 0 && ((fs->fs_flags & FS_UNCLEAN) == 0 || (flags & LK_NOWAIT) != 0)) printf("handle_workitem_freeblocks: block count\n"); - if (allerror) - softdep_error("handle_workitem_freeblks", allerror); #endif /* INVARIANTS */ ACQUIRE_LOCK(&lk); + /* + * All of the freeblock deps must be complete prior to this call + * so it's now safe to complete earlier outstanding journal entries. + */ + handle_jwork(&freeblks->fb_jwork); WORKITEM_FREE(freeblks, D_FREEBLKS); num_freeblkdep--; FREE_LOCK(&lk); @@ -2830,29 +5087,39 @@ handle_workitem_freeblocks(freeblks, flags) * and recursive calls to indirtrunc must be used to cleanse other indirect * blocks. */ -static int -indir_trunc(freeblks, dbn, level, lbn, countp) - struct freeblks *freeblks; +static void +indir_trunc(freework, dbn, lbn) + struct freework *freework; ufs2_daddr_t dbn; - int level; ufs_lbn_t lbn; - ufs2_daddr_t *countp; { + struct workhead wkhd; + struct jnewblk *jnewblk; + struct freeblks *freeblks; struct buf *bp; struct fs *fs; + struct worklist *wkn; struct worklist *wk; struct indirdep *indirdep; struct ufsmount *ump; ufs1_daddr_t *bap1 = 0; - ufs2_daddr_t nb, *bap2 = 0; + ufs2_daddr_t nb, nnb, *bap2 = 0; ufs_lbn_t lbnadd; int i, nblocks, ufs1fmt; - int error, allerror = 0; int fs_pendingblocks; + int freedeps; + int level; + int cnt; + LIST_INIT(&wkhd); + level = lbn_level(lbn); + if (level == -1) + panic("indir_trunc: Invalid lbn %jd\n", lbn); + freeblks = freework->fw_freeblks; ump = VFSTOUFS(freeblks->fb_list.wk_mp); fs = ump->um_fs; fs_pendingblocks = 0; + freedeps = 0; lbnadd = 1; for (i = level; i > 0; i--) lbnadd *= NINDIR(fs); @@ -2880,23 +5147,34 @@ handle_workitem_freeblocks(freeblks, flags) (indirdep = WK_INDIRDEP(wk))->ir_savebp != bp || (indirdep->ir_state & GOINGAWAY) == 0) panic("indir_trunc: lost indirdep"); - WORKLIST_REMOVE(wk); - WORKITEM_FREE(indirdep, D_INDIRDEP); + LIST_SWAP(&wkhd, &indirdep->ir_jwork, worklist, wk_list); + free_indirdep(indirdep); if (!LIST_EMPTY(&bp->b_dep)) panic("indir_trunc: dangling dep"); ump->um_numindirdeps -= 1; FREE_LOCK(&lk); } else { + /* + * If the bp exists we will deadlock against it below. Wait + * for handle_workitem_indirdep() to run and release it. We + * have no way of finding the indirdep to cancel the workitem + * for now. This operation will simply restart at the end + * of the list after the indirdep has been released. + */ + if (bp) { + add_to_worklist(&freework->fw_list); + FREE_LOCK(&lk); + return; + } #ifdef notyet if (bp) brelse(bp); #endif FREE_LOCK(&lk); - error = bread(freeblks->fb_devvp, dbn, (int)fs->fs_bsize, - NOCRED, &bp); - if (error) { + if (bread(freeblks->fb_devvp, dbn, (int)fs->fs_bsize, + NOCRED, &bp) != 0) { brelse(bp); - return (error); + return; } } /* @@ -2909,57 +5187,148 @@ handle_workitem_freeblocks(freeblks, flags) ufs1fmt = 0; bap2 = (ufs2_daddr_t *)bp->b_data; } - nblocks = btodb(fs->fs_bsize); - for (i = NINDIR(fs) - 1; i >= 0; i--) { - if (ufs1fmt) + /* + * Reclaim indirect blocks which never made it to disk. + */ + cnt = 0; + LIST_FOREACH_SAFE(wk, &wkhd, wk_list, wkn) { + struct workhead freewk; + if (wk->wk_type != D_JNEWBLK) + continue; + WORKLIST_REMOVE_UNLOCKED(wk); + LIST_INIT(&freewk); + WORKLIST_INSERT_UNLOCKED(&freewk, wk); + jnewblk = WK_JNEWBLK(wk); + if (jnewblk->jn_lbn > 0) + i = (jnewblk->jn_lbn - -lbn) / lbnadd; + else + i = (jnewblk->jn_lbn - lbn) / lbnadd; + KASSERT(i >= 0 && i < NINDIR(fs), + ("indir_trunc: Index out of range %d parent %jd lbn %jd", + i, lbn, jnewblk->jn_lbn)); + /* Clear the pointer so it isn't found below. */ + if (ufs1fmt) { nb = bap1[i]; - else + bap1[i] = 0; + } else { nb = bap2[i]; + bap2[i] = 0; + } + KASSERT(nb == jnewblk->jn_blkno, + ("indir_trunc: Block mismatch %jd != %jd", + nb, jnewblk->jn_blkno)); + ffs_blkfree(ump, fs, freeblks->fb_devvp, jnewblk->jn_blkno, + fs->fs_bsize, freeblks->fb_previousinum, &freewk); + cnt++; + } + ACQUIRE_LOCK(&lk); + freework->fw_ref += NINDIR(fs) + 1; + /* Any remaining journal work can be completed with freeblks. */ + jwork_move(D_FREEBLKS, __LINE__, &freeblks->fb_jwork, &wkhd); + FREE_LOCK(&lk); + nblocks = btodb(fs->fs_bsize); + if (ufs1fmt) + nb = bap1[0]; + else + nb = bap2[0]; + /* + * Reclaim on disk blocks. + */ + for (i = freework->fw_off; i < NINDIR(fs); i++, nb = nnb) { + if (i != NINDIR(fs) - 1) { + if (ufs1fmt) + nnb = bap1[i+1]; + else + nnb = bap2[i+1]; + } else + nnb = 0; if (nb == 0) continue; + cnt++; if (level != 0) { - if ((error = indir_trunc(freeblks, fsbtodb(fs, nb), - level - 1, lbn + (i * lbnadd), countp)) != 0) - allerror = error; + struct freework *nfreework; + ufs_lbn_t nlbn; + + nlbn = (lbn + 1) - (i * lbnadd); + nfreework = newfreework(freeblks, freework, nlbn, nb, + fs->fs_frag, 0); + freedeps++; + indir_trunc(nfreework, fsbtodb(fs, nb), nlbn); + } else { + struct freedep *freedep; + + /* + * Attempt to aggregate freedep dependencies for + * all blocks being released to the same CG. + */ + LIST_INIT(&wkhd); + if (nnb == 0 || (dtog(fs, nb) != dtog(fs, nnb))) { + freedep = newfreedep(freework); + WORKLIST_INSERT_UNLOCKED(&wkhd, + &freedep->fd_list); + freedeps++; + } + ffs_blkfree(ump, fs, freeblks->fb_devvp, nb, + fs->fs_bsize, freeblks->fb_previousinum, &wkhd); + fs_pendingblocks += nblocks; } - ffs_blkfree(ump, fs, freeblks->fb_devvp, nb, fs->fs_bsize, - freeblks->fb_previousinum); - fs_pendingblocks += nblocks; - *countp += nblocks; } - UFS_LOCK(ump); - fs->fs_pendingblocks -= fs_pendingblocks; - UFS_UNLOCK(ump); + ACQUIRE_LOCK(&lk); + freework->fw_off = i; + if (level == 0) + fs_pendingblocks = (nblocks * cnt); + freework->fw_ref += freedeps; + freework->fw_ref -= NINDIR(fs) + 1; + if (freework->fw_ref != 0) + freework = NULL; + FREE_LOCK(&lk); + if (fs_pendingblocks) { + ACQUIRE_LOCK(&lk); + freeblks->fb_chkcnt -= fs_pendingblocks; + FREE_LOCK(&lk); + UFS_LOCK(ump); + fs->fs_pendingblocks -= fs_pendingblocks; + UFS_UNLOCK(ump); + } bp->b_flags |= B_INVAL | B_NOCACHE; brelse(bp); - return (allerror); + if (freework) + handle_workitem_indirblk(freework); + return; } /* - * Free an allocindir. - * This routine must be called with splbio interrupts blocked. + * Cancel an allocindir when it is removed via truncation. */ static void -free_allocindir(aip, inodedep) +cancel_allocindir(aip, inodedep, freeblks) struct allocindir *aip; struct inodedep *inodedep; + struct freeblks *freeblks; { - struct freefrag *freefrag; + struct newblk *newblk; - mtx_assert(&lk, MA_OWNED); - if ((aip->ai_state & DEPCOMPLETE) == 0) - LIST_REMOVE(aip, ai_deps); - if (aip->ai_state & ONWORKLIST) - WORKLIST_REMOVE(&aip->ai_list); + /* + * If the journal hasn't been written the jnewblk must be passed + * to the call to ffs_freeblk that reclaims the space. We accomplish + * this by linking the journal dependency into the indirdep to be + * freed when indir_trunc() is called. If the journal has already + * been written we can simply reclaim the journal space when the + * freeblks work is complete. + */ LIST_REMOVE(aip, ai_next); - if ((freefrag = aip->ai_freefrag) != NULL) { - if (inodedep == NULL) - add_to_worklist(&freefrag->ff_list); - else - WORKLIST_INSERT(&inodedep->id_bufwait, - &freefrag->ff_list); - } - WORKITEM_FREE(aip, D_ALLOCINDIR); + newblk = (struct newblk *)aip; + if (newblk->nb_state & ONWORKLIST) + WORKLIST_REMOVE(&newblk->nb_list); + if (newblk->nb_jnewblk == NULL) + cancel_newblk(newblk, &freeblks->fb_jwork, D_FREEBLKS); + else + cancel_newblk(newblk, &aip->ai_indirdep->ir_jwork, + D_ALLOCINDIR); + if (inodedep->id_state & DEPCOMPLETE) + WORKLIST_INSERT(&inodedep->id_bufwait, &newblk->nb_list); + else + free_newblk(newblk); } /* @@ -2998,11 +5367,14 @@ softdep_setup_directory_add(bp, dp, diroffset, new ufs_lbn_t lbn; /* block in directory containing new entry */ struct fs *fs; struct diradd *dap; - struct allocdirect *adp; + struct newblk *newblk; struct pagedep *pagedep; struct inodedep *inodedep; struct newdirblk *newdirblk = 0; + struct newdirblk *mknewdirblk = 0; struct mkdir *mkdir1, *mkdir2; + struct worklist *wk; + struct jaddref *jaddref; struct mount *mp; /* @@ -3013,6 +5385,7 @@ softdep_setup_directory_add(bp, dp, diroffset, new bdwrite(newdirbp); return (0); } + mkdir1 = NULL; mp = UFSTOVFS(dp->i_ump); fs = dp->i_fs; lbn = lblkno(fs, diroffset); @@ -3023,25 +5396,31 @@ softdep_setup_directory_add(bp, dp, diroffset, new dap->da_offset = offset; dap->da_newinum = newinum; dap->da_state = ATTACHED; - if (isnewblk && lbn < NDADDR && fragoff(fs, diroffset) == 0) { + LIST_INIT(&dap->da_jwork); + if (isnewblk && fragoff(fs, diroffset) == 0) { newdirblk = malloc(sizeof(struct newdirblk), M_NEWDIRBLK, M_SOFTDEP_FLAGS); workitem_alloc(&newdirblk->db_list, D_NEWDIRBLK, mp); + LIST_INIT(&newdirblk->db_mkdir); } if (newdirbp == NULL) { dap->da_state |= DEPCOMPLETE; ACQUIRE_LOCK(&lk); } else { + mknewdirblk = malloc(sizeof(struct newdirblk), + M_NEWDIRBLK, M_SOFTDEP_FLAGS); + workitem_alloc(&mknewdirblk->db_list, D_NEWDIRBLK, mp); + LIST_INIT(&mknewdirblk->db_mkdir); dap->da_state |= MKDIR_BODY | MKDIR_PARENT; mkdir1 = malloc(sizeof(struct mkdir), M_MKDIR, M_SOFTDEP_FLAGS); workitem_alloc(&mkdir1->md_list, D_MKDIR, mp); - mkdir1->md_state = MKDIR_BODY; + mkdir1->md_state = ATTACHED | MKDIR_BODY; mkdir1->md_diradd = dap; mkdir2 = malloc(sizeof(struct mkdir), M_MKDIR, M_SOFTDEP_FLAGS); workitem_alloc(&mkdir2->md_list, D_MKDIR, mp); - mkdir2->md_state = MKDIR_PARENT; + mkdir2->md_state = ATTACHED | MKDIR_PARENT; mkdir2->md_diradd = dap; /* * Dependency on "." and ".." being written to disk. @@ -3049,85 +5428,114 @@ softdep_setup_directory_add(bp, dp, diroffset, new mkdir1->md_buf = newdirbp; ACQUIRE_LOCK(&lk); LIST_INSERT_HEAD(&mkdirlisthd, mkdir1, md_mkdirs); - WORKLIST_INSERT(&newdirbp->b_dep, &mkdir1->md_list); - FREE_LOCK(&lk); - bdwrite(newdirbp); /* - * Dependency on link count increase for parent directory + * We must link the pagedep, allocdirect, and newdirblk for + * the initial file page so the pointer to the new directory + * is not written until the directory contents are live and + * any subsequent additions are not marked live until the + * block is reachable via the inode. */ - ACQUIRE_LOCK(&lk); - if (inodedep_lookup(mp, dp->i_number, 0, &inodedep) == 0 - || (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) { - dap->da_state &= ~MKDIR_PARENT; - WORKITEM_FREE(mkdir2, D_MKDIR); - } else { - LIST_INSERT_HEAD(&mkdirlisthd, mkdir2, md_mkdirs); - WORKLIST_INSERT(&inodedep->id_bufwait,&mkdir2->md_list); - } + if (pagedep_lookup(mp, newinum, 0, 0, &pagedep) == 0) + panic("softdep_setup_directory_add: " + "lost mkdir pagedep"); + LIST_FOREACH(wk, &newdirbp->b_dep, wk_list) + if (wk->wk_type == D_ALLOCDIRECT) + break; + if (wk == NULL) + panic("softdep_setup_directory_add: lost mkdir adp"); + newblk = WK_NEWBLK(wk); + pagedep->pd_state |= NEWBLOCK; + pagedep->pd_newdirblk = mknewdirblk; + mknewdirblk->db_pagedep = pagedep; + WORKLIST_INSERT(&newblk->nb_newdirblk, &mknewdirblk->db_list); + WORKLIST_INSERT(&mknewdirblk->db_mkdir, &mkdir1->md_list); + /* + * Look up the inodedep for the parent directory so that we + * can link mkdir2 into the pending dotdot jaddref or + * the inode write if there is none. If the inode is + * ALLCOMPLETE and no jaddref is present all dependencies have + * been satisfied and mkdir2 can be freed. + */ + if (inodedep_lookup(mp, dp->i_number, 0, &inodedep) == 0) + panic("softdep_setup_directory_add: lost parent"); + jaddref = LIST_FIRST(&inodedep->id_jaddrefhd); + KASSERT(jaddref != NULL && jaddref->ja_parent == newinum && + (jaddref->ja_state & MKDIR_PARENT), + ("softdep_setup_directory_add: bad dotdot jaddref %p", + jaddref)); + LIST_INSERT_HEAD(&mkdirlisthd, mkdir2, md_mkdirs); + mkdir2->md_jaddref = jaddref; + jaddref->ja_mkdir = mkdir2; + /* + * It is important that this journal entry is added prior + * to the dot entry since it writes both the dot and dotdot + * links. This entry must be visible to the recovery + * operation for it to correctly adjust the parent's link. + */ + add_to_journal(&jaddref->ja_list); } /* * Link into parent directory pagedep to await its being written. */ - if (pagedep_lookup(dp, lbn, DEPALLOC, &pagedep) == 0) + if (pagedep_lookup(mp, dp->i_number, lbn, DEPALLOC, &pagedep) == 0) WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list); + dap->da_pagedep = pagedep; LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)], dap, da_pdlist); /* - * Link into its inodedep. Put it on the id_bufwait list if the inode - * is not yet written. If it is written, do the post-inode write - * processing to put it on the id_pendinghd list. + * Link the diradd into the jaddref so it may be completed after + * the journal entry is written. The directory offset was not + * known until now so it must still exist as the first element + * of the jaddrefhd queue. */ - (void) inodedep_lookup(mp, newinum, DEPALLOC, &inodedep); - if ((inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) - diradd_inode_written(dap, inodedep); - else - WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list); - if (isnewblk) { + if (inodedep_lookup(mp, newinum, 0, &inodedep) == 0) + panic("softdep_setup_directory_add: Lost inodedep"); + jaddref = LIST_FIRST(&inodedep->id_jaddrefhd); + KASSERT(jaddref != NULL && jaddref->ja_parent == dp->i_number, + ("softdep_setup_directory_add: bad jaddref %p", jaddref)); + jaddref->ja_diroff = diroffset; + jaddref->ja_diradd = dap; + add_to_journal(&jaddref->ja_list); + /* + * If we are adding a new directory remember this diradd so that if + * we rename it we can keep the dot and dotdot dependencies. If + * we are adding a new name for an inode that has a mkdiradd we + * must be in rename and we have to move the dot and dotdot + * dependencies to this new name. The old name is being orphaned + * soon. + */ + if (mkdir1 != NULL) { + if (inodedep->id_mkdiradd != NULL) + panic("softdep_setup_directory_add: Existing mkdir"); + inodedep->id_mkdiradd = dap; + jaddref = LIST_NEXT(jaddref, ja_inodeps); + KASSERT(jaddref != NULL && + jaddref->ja_ino == jaddref->ja_parent && + (jaddref->ja_state & MKDIR_BODY), + ("softdep_setup_directory_add: bad dot jaddref %p", + jaddref)); + mkdir1->md_jaddref = jaddref; + jaddref->ja_mkdir = mkdir1; + add_to_journal(&jaddref->ja_list); + } else if (inodedep->id_mkdiradd) + merge_diradd(inodedep, dap); + if (newdirblk) { /* - * Directories growing into indirect blocks are rare - * enough and the frequency of new block allocation - * in those cases even more rare, that we choose not - * to bother tracking them. Rather we simply force the - * new directory entry to disk. + * There is nothing to do if we are already tracking + * this block. */ - if (lbn >= NDADDR) { - FREE_LOCK(&lk); - /* - * We only have a new allocation when at the - * beginning of a new block, not when we are - * expanding into an existing block. - */ - if (blkoff(fs, diroffset) == 0) - return (1); - return (0); - } - /* - * We only have a new allocation when at the beginning - * of a new fragment, not when we are expanding into an - * existing fragment. Also, there is nothing to do if we - * are already tracking this block. - */ - if (fragoff(fs, diroffset) != 0) { - FREE_LOCK(&lk); - return (0); - } if ((pagedep->pd_state & NEWBLOCK) != 0) { WORKITEM_FREE(newdirblk, D_NEWDIRBLK); FREE_LOCK(&lk); return (0); } - /* - * Find our associated allocdirect and have it track us. - */ - if (inodedep_lookup(mp, dp->i_number, 0, &inodedep) == 0) - panic("softdep_setup_directory_add: lost inodedep"); - adp = TAILQ_LAST(&inodedep->id_newinoupdt, allocdirectlst); - if (adp == NULL || adp->ad_lbn != lbn) + if (newblk_lookup(mp, bp->b_blkno, 0, &newblk) == 0) panic("softdep_setup_directory_add: lost entry"); + WORKLIST_INSERT(&newblk->nb_newdirblk, &newdirblk->db_list); pagedep->pd_state |= NEWBLOCK; + pagedep->pd_newdirblk = newdirblk; newdirblk->db_pagedep = pagedep; - WORKLIST_INSERT(&adp->ad_newdirblk, &newdirblk->db_list); } FREE_LOCK(&lk); return (0); @@ -3156,12 +5564,14 @@ softdep_change_directoryentry_offset(dp, base, old ACQUIRE_LOCK(&lk); lbn = lblkno(dp->i_fs, dp->i_offset); offset = blkoff(dp->i_fs, dp->i_offset); - if (pagedep_lookup(dp, lbn, 0, &pagedep) == 0) + if (pagedep_lookup(UFSTOVFS(dp->i_ump), + dp->i_number, lbn, 0, &pagedep) == 0) goto done; oldoffset = offset + (oldloc - base); newoffset = offset + (newloc - base); - LIST_FOREACH(dap, &pagedep->pd_diraddhd[DIRADDHASH(oldoffset)], da_pdlist) { + LIST_FOREACH(dap, &pagedep->pd_diraddhd[DIRADDHASH(oldoffset)], + da_pdlist) { if (dap->da_offset != oldoffset) continue; dap->da_offset = newoffset; @@ -3184,48 +5594,139 @@ softdep_change_directoryentry_offset(dp, base, old done: bcopy(oldloc, newloc, entrysize); FREE_LOCK(&lk); + /* XXX Make a remove and add record, add to the pagedep. */ } /* + * Move the mkdir dependencies and journal work from one diradd to another + * when renaming a directory. The new name must depend on the mkdir deps + * completing as the old name did. Directories can only have one valid link + * at a time so one must be canonical. + */ +static void +merge_diradd(inodedep, newdap) + struct inodedep *inodedep; + struct diradd *newdap; +{ + struct diradd *olddap; + struct mkdir *mkdir, *nextmd; + short state; + + olddap = inodedep->id_mkdiradd; + inodedep->id_mkdiradd = newdap; + if ((olddap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) { + newdap->da_state &= ~DEPCOMPLETE; + for (mkdir = LIST_FIRST(&mkdirlisthd); mkdir; mkdir = nextmd) { + nextmd = LIST_NEXT(mkdir, md_mkdirs); + if (mkdir->md_diradd != olddap) + continue; + mkdir->md_diradd = newdap; + state = mkdir->md_state & (MKDIR_PARENT | MKDIR_BODY); + newdap->da_state |= state; + olddap->da_state &= ~state; + if ((olddap->da_state & + (MKDIR_PARENT | MKDIR_BODY)) == 0) + break; + } + if ((olddap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) + panic("merge_diradd: unfound ref"); + } + /* + * Any mkdir related journal items are not safe to be freed until + * the new name is stable. + */ + jwork_move(D_DIRADD, __LINE__, &newdap->da_jwork, &olddap->da_jwork); + olddap->da_state |= DEPCOMPLETE; + complete_diradd(olddap); +} + +/* + * Move the diradd to the pending list when all diradd dependencies are + * complete. + */ +static void +complete_diradd(dap) + struct diradd *dap; +{ + struct pagedep *pagedep; + + if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) { + if (dap->da_state & DIRCHG) + pagedep = dap->da_previous->dm_pagedep; + else + pagedep = dap->da_pagedep; + LIST_REMOVE(dap, da_pdlist); + LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist); + } +} + +/* * Free a diradd dependency structure. This routine must be called - * with splbio interrupts blocked. + * with splbio interrupts blocked. If wkhd is NULL we should not find + * any pending jaddrefs and only jsegdeps to be retired. */ static void -free_diradd(dap) +free_diradd(dap, wkhd) struct diradd *dap; + struct workhead *wkhd; { struct dirrem *dirrem; struct pagedep *pagedep; struct inodedep *inodedep; struct mkdir *mkdir, *nextmd; + struct jaddref *jaddref; mtx_assert(&lk, MA_OWNED); - WORKLIST_REMOVE(&dap->da_list); LIST_REMOVE(dap, da_pdlist); + if (dap->da_state & ONWORKLIST) + WORKLIST_REMOVE(&dap->da_list); if ((dap->da_state & DIRCHG) == 0) { pagedep = dap->da_pagedep; } else { dirrem = dap->da_previous; pagedep = dirrem->dm_pagedep; dirrem->dm_dirinum = pagedep->pd_ino; - add_to_worklist(&dirrem->dm_list); + dirrem->dm_state |= COMPLETE; + if (LIST_EMPTY(&dirrem->dm_jremrefhd)) + add_to_worklist(&dirrem->dm_list); } if (inodedep_lookup(pagedep->pd_list.wk_mp, dap->da_newinum, - 0, &inodedep) != 0) + 0, &inodedep) != 0) { + /* Abort the addref that reference this diradd. */ + LIST_FOREACH(jaddref, &inodedep->id_jaddrefhd, ja_inodeps) + if (jaddref->ja_diradd == dap) { + cancel_jaddref(jaddref, inodedep, wkhd); + break; + } + if (inodedep->id_mkdiradd == dap) + inodedep->id_mkdiradd = NULL; (void) free_inodedep(inodedep); + } if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) { for (mkdir = LIST_FIRST(&mkdirlisthd); mkdir; mkdir = nextmd) { nextmd = LIST_NEXT(mkdir, md_mkdirs); if (mkdir->md_diradd != dap) continue; - dap->da_state &= ~mkdir->md_state; - WORKLIST_REMOVE(&mkdir->md_list); + dap->da_state &= + ~(mkdir->md_state & (MKDIR_PARENT | MKDIR_BODY)); LIST_REMOVE(mkdir, md_mkdirs); + if (mkdir->md_state & ONWORKLIST) + WORKLIST_REMOVE(&mkdir->md_list); + if ((jaddref = mkdir->md_jaddref) != NULL) + cancel_jaddref(jaddref, NULL, wkhd); WORKITEM_FREE(mkdir, D_MKDIR); + if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) == 0) + break; } if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) panic("free_diradd: unfound ref"); } + if (wkhd) + jwork_move(D_DIRREM, __LINE__, wkhd, &dap->da_jwork); + /* + * Free any journal segments waiting for the directory write. + */ + handle_jwork(&dap->da_jwork); WORKITEM_FREE(dap, D_DIRADD); } @@ -3254,11 +5755,14 @@ softdep_setup_remove(bp, dp, ip, isrmdir) int isrmdir; /* indicates if doing RMDIR */ { struct dirrem *dirrem, *prevdirrem; + int direct; /* - * Allocate a new dirrem if appropriate and ACQUIRE_LOCK. + * Allocate a new dirrem if appropriate and ACQUIRE_LOCK. We want + * newdirrem() to setup the full directory remove which requires + * isrmdir > 1. */ - dirrem = newdirrem(bp, dp, ip, isrmdir, &prevdirrem); + dirrem = newdirrem(bp, dp, ip, isrmdir?2:0, &prevdirrem); /* * If the COMPLETE flag is clear, then there were no active @@ -3280,12 +5784,64 @@ softdep_setup_remove(bp, dp, ip, isrmdir) LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd, prevdirrem, dm_next); dirrem->dm_dirinum = dirrem->dm_pagedep->pd_ino; + direct = LIST_EMPTY(&dirrem->dm_jremrefhd); FREE_LOCK(&lk); - handle_workitem_remove(dirrem, NULL); + if (direct) + handle_workitem_remove(dirrem, NULL); } } /* + * Check for an entry matching 'offset' on both the pd_dirraddhd list and the + * pd_pendinghd list of a pagedep. + */ +static struct diradd * +diradd_lookup(pagedep, offset) + struct pagedep *pagedep; + int offset; +{ + struct diradd *dap; + + LIST_FOREACH(dap, &pagedep->pd_diraddhd[DIRADDHASH(offset)], da_pdlist) + if (dap->da_offset == offset) + return (dap); + LIST_FOREACH(dap, &pagedep->pd_pendinghd, da_pdlist) + if (dap->da_offset == offset) + return (dap); + return (NULL); +} + +/* + * Search for a .. diradd dependency in a directory that is being removed. + * If the directory was renamed to a new parent we have a diradd rather + * than a mkdir for the .. entry. We need to cancel it now before + * it is found in truncate(). + */ +static void +cancel_diradd_dotdot(ip, dirrem) + struct inode *ip; + struct dirrem *dirrem; +{ + struct pagedep *pagedep; + struct diradd *dap; + struct worklist *wk; + + if (pagedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, 0, 0, + &pagedep) == 0) + return; + dap = diradd_lookup(pagedep, DOTDOT_OFFSET); + if (dap == NULL) + return; + free_diradd(dap, &dirrem->dm_jwork); + /* + * Mark any journal work as belonging to the parent so it is freed + * with the .. reference. + */ + LIST_FOREACH(wk, &dirrem->dm_jwork, wk_list) + wk->wk_state |= MKDIR_PARENT; +} + +/* * Allocate a new dirrem if appropriate and return it along with * its associated pagedep. Called without a lock, returns with lock. */ @@ -3303,6 +5859,9 @@ newdirrem(bp, dp, ip, isrmdir, prevdirremp) struct diradd *dap; struct dirrem *dirrem; struct pagedep *pagedep; + struct jremref *jremref; + struct jremref *dotremref; + struct jremref *dotdotremref; /* * Whiteouts have no deletion dependencies. @@ -3322,33 +5881,67 @@ newdirrem(bp, dp, ip, isrmdir, prevdirremp) dirrem = malloc(sizeof(struct dirrem), M_DIRREM, M_SOFTDEP_FLAGS|M_ZERO); workitem_alloc(&dirrem->dm_list, D_DIRREM, ITOV(dp)->v_mount); + LIST_INIT(&dirrem->dm_jremrefhd); + LIST_INIT(&dirrem->dm_jwork); dirrem->dm_state = isrmdir ? RMDIR : 0; dirrem->dm_oldinum = ip->i_number; *prevdirremp = NULL; - + /* + * Allocate remove reference structures to track journal write + * dependencies. We will always have one for the link and + * when doing directories we will always have one more for dot. + * When renaming a directory we skip the dotdot link change so + * this is not needed. + */ + dotremref = dotdotremref = NULL; + jremref = newjremref(dirrem, dp, ip, dp->i_offset); + if (isrmdir) + dotremref = newjremref(dirrem, ip, ip, DOT_OFFSET); + if (isrmdir > 1) { + dotdotremref = newjremref(dirrem, ip, dp, DOTDOT_OFFSET); + dotdotremref->jr_state |= MKDIR_PARENT; + } ACQUIRE_LOCK(&lk); lbn = lblkno(dp->i_fs, dp->i_offset); offset = blkoff(dp->i_fs, dp->i_offset); - if (pagedep_lookup(dp, lbn, DEPALLOC, &pagedep) == 0) + if (pagedep_lookup(UFSTOVFS(dp->i_ump), dp->i_number, lbn, DEPALLOC, + &pagedep) == 0) WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list); dirrem->dm_pagedep = pagedep; /* + * If we're removing a .. link search for the dependency now and + * cancel it. Any pending journal work will be added to the dirrem + * to be completed when the workitem remove completes. + */ + if (isrmdir > 1) + cancel_diradd_dotdot(ip, dirrem); + /* * Check for a diradd dependency for the same directory entry. * If present, then both dependencies become obsolete and can - * be de-allocated. Check for an entry on both the pd_dirraddhd - * list and the pd_pendinghd list. + * be de-allocated. */ - - LIST_FOREACH(dap, &pagedep->pd_diraddhd[DIRADDHASH(offset)], da_pdlist) - if (dap->da_offset == offset) - break; + dap = diradd_lookup(pagedep, offset); if (dap == NULL) { - - LIST_FOREACH(dap, &pagedep->pd_pendinghd, da_pdlist) - if (dap->da_offset == offset) - break; - if (dap == NULL) - return (dirrem); + /* + * Link the jremref structures into the dirrem so they are + * written prior to the pagedep. + */ + if (jremref) { + LIST_INSERT_HEAD(&dirrem->dm_jremrefhd, jremref, + jr_deps); + add_to_journal(&jremref->jr_list); + if (dotremref) { + LIST_INSERT_HEAD(&dirrem->dm_jremrefhd, + dotremref, jr_deps); + add_to_journal(&dotremref->jr_list); + } + if (dotdotremref) { + LIST_INSERT_HEAD(&dirrem->dm_jremrefhd, + dotdotremref, jr_deps); + add_to_journal(&dotdotremref->jr_list); + } + } + return (dirrem); } /* * Must be ATTACHED at this point. @@ -3359,6 +5952,20 @@ newdirrem(bp, dp, ip, isrmdir, prevdirremp) panic("newdirrem: inum %d should be %d", ip->i_number, dap->da_newinum); /* + * If we've found a diradd in memory it still has valid journal + * entries to complete. Rather than writing new journal entries + * we complete the segdeps only after the removal is complete. The + * recovery operation will simply find an incomplete add. If the + * addref journal is not written the jaddrefs will stay linked into + * the inodedep and bmsafemap preventing the writes of the new + * data until they are ultimately freed. + */ + WORKITEM_FREE(jremref, D_JREMREF); + if (dotremref) + WORKITEM_FREE(dotremref, D_JREMREF); + if (dotdotremref) + WORKITEM_FREE(dotdotremref, D_JREMREF); + /* * If we are deleting a changed name that never made it to disk, * then return the dirrem describing the previous inode (which * represents the inode currently referenced from this entry on disk). @@ -3373,7 +5980,18 @@ newdirrem(bp, dp, ip, isrmdir, prevdirremp) * Mark it COMPLETE so we can delete its inode immediately. */ dirrem->dm_state |= COMPLETE; - free_diradd(dap); + free_diradd(dap, &dirrem->dm_jwork); +#ifdef DEBUG + /* XXX Temporary. */ + if (isrmdir == 0) { + struct worklist *wk; + + LIST_FOREACH(wk, &dirrem->dm_jwork, wk_list) + if (wk->wk_state & (MKDIR_BODY | MKDIR_PARENT)) + panic("bad wk %p (0x%X)\n", wk, wk->wk_state); + } +#endif + return (dirrem); } @@ -3407,6 +6025,7 @@ softdep_setup_directory_change(bp, dp, ip, newinum struct dirrem *dirrem, *prevdirrem; struct pagedep *pagedep; struct inodedep *inodedep; + struct jaddref *jaddref; struct mount *mp; offset = blkoff(dp->i_fs, dp->i_offset); @@ -3422,6 +6041,7 @@ softdep_setup_directory_change(bp, dp, ip, newinum dap->da_state = DIRCHG | ATTACHED | DEPCOMPLETE; dap->da_offset = offset; dap->da_newinum = newinum; + LIST_INIT(&dap->da_jwork); } /* @@ -3454,7 +6074,8 @@ softdep_setup_directory_change(bp, dp, ip, newinum dm_next); } else { dirrem->dm_dirinum = pagedep->pd_ino; - add_to_worklist(&dirrem->dm_list); + if (LIST_EMPTY(&dirrem->dm_jremrefhd)) + add_to_worklist(&dirrem->dm_list); } FREE_LOCK(&lk); return; @@ -3483,23 +6104,30 @@ softdep_setup_directory_change(bp, dp, ip, newinum dap->da_pagedep = pagedep; } dirrem->dm_dirinum = pagedep->pd_ino; - add_to_worklist(&dirrem->dm_list); + if (LIST_EMPTY(&dirrem->dm_jremrefhd)) + add_to_worklist(&dirrem->dm_list); } /* - * Link into its inodedep. Put it on the id_bufwait list if the inode - * is not yet written. If it is written, do the post-inode write - * processing to put it on the id_pendinghd list. + * Lookup the jaddref for this journal entry. We must finish + * initializing it and make the diradd write dependent on it. */ - if (inodedep_lookup(mp, newinum, DEPALLOC, &inodedep) == 0 || - (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) { - dap->da_state |= COMPLETE; - LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist); - WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list); - } else { - LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)], - dap, da_pdlist); - WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list); - } + if (inodedep_lookup(mp, newinum, 0, &inodedep) == 0) + panic("softdep_setup_directory_change: Lost inodedep."); + jaddref = LIST_FIRST(&inodedep->id_jaddrefhd); + KASSERT(jaddref != NULL && jaddref->ja_parent == dp->i_number, + ("softdep_setup_directory_change: bad jaddref %p", jaddref)); + jaddref->ja_diroff = dp->i_offset; + jaddref->ja_diradd = dap; + LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)], dap, + da_pdlist); + add_to_journal(&jaddref->ja_list); + /* + * If we're making a new name for a directory that has not been + * committed when need to move the dot and dotdot references to + * this new name. + */ + if (inodedep->id_mkdiradd) + merge_diradd(inodedep, dap); FREE_LOCK(&lk); } @@ -3584,6 +6212,8 @@ handle_workitem_remove(dirrem, xp) { struct thread *td = curthread; struct inodedep *inodedep; + struct workhead dotdotwk; + struct worklist *wk; struct vnode *vp; struct inode *ip; ino_t oldinum; @@ -3600,7 +6230,27 @@ handle_workitem_remove(dirrem, xp) if ((inodedep_lookup(dirrem->dm_list.wk_mp, dirrem->dm_oldinum, 0, &inodedep)) == 0) panic("handle_workitem_remove: lost inodedep"); + KASSERT(LIST_EMPTY(&dirrem->dm_jremrefhd), + ("handle_workitem_remove: Journal entries not written.")); /* + * Move all dependencies waiting on the remove to complete + * from the dirrem to the inode inowait list to be completed + * after the inode has been updated and written to disk. Any + * marked MKDIR_PARENT are saved to be completed when the .. ref + * is removed. + */ + LIST_INIT(&dotdotwk); + while ((wk = LIST_FIRST(&dirrem->dm_jwork)) != NULL) { + WORKLIST_REMOVE(wk); + if (wk->wk_state & MKDIR_PARENT) { + wk->wk_state &= ~MKDIR_PARENT; + WORKLIST_INSERT(&dotdotwk, wk); + continue; + } + WORKLIST_INSERT(&inodedep->id_inowait, wk); + } + LIST_SWAP(&dirrem->dm_jwork, &dotdotwk, worklist, wk_list); + /* * Normal file deletion. */ if ((dirrem->dm_state & RMDIR) == 0) { @@ -3611,6 +6261,9 @@ handle_workitem_remove(dirrem, xp) panic("handle_workitem_remove: bad file delta"); inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink; num_dirrem -= 1; + KASSERT(LIST_EMPTY(&dirrem->dm_jwork), + ("handle_workitem_remove: worklist not empty. %s", + TYPENAME(LIST_FIRST(&dirrem->dm_jwork)->wk_type))); WORKITEM_FREE(dirrem, D_DIRREM); FREE_LOCK(&lk); vput(vp); @@ -3639,6 +6292,8 @@ handle_workitem_remove(dirrem, xp) * directory should not change. Thus we skip the followup dirrem. */ if (dirrem->dm_state & DIRCHG) { + KASSERT(LIST_EMPTY(&dirrem->dm_jwork), + ("handle_workitem_remove: DIRCHG and worklist not empty.")); num_dirrem -= 1; WORKITEM_FREE(dirrem, D_DIRREM); FREE_LOCK(&lk); @@ -3689,6 +6344,7 @@ static void handle_workitem_freefile(freefile) struct freefile *freefile; { + struct workhead wkhd; struct fs *fs; struct inodedep *idp; struct ufsmount *ump; @@ -3701,13 +6357,15 @@ handle_workitem_freefile(freefile) error = inodedep_lookup(UFSTOVFS(ump), freefile->fx_oldinum, 0, &idp); FREE_LOCK(&lk); if (error) - panic("handle_workitem_freefile: inodedep survived"); + panic("handle_workitem_freefile: inodedep %p survived", idp); #endif UFS_LOCK(ump); fs->fs_pendinginodes -= 1; UFS_UNLOCK(ump); + LIST_INIT(&wkhd); + LIST_SWAP(&freefile->fx_jwork, &wkhd, worklist, wk_list); if ((error = ffs_freefile(ump, fs, freefile->fx_devvp, - freefile->fx_oldinum, freefile->fx_mode)) != 0) + freefile->fx_oldinum, freefile->fx_mode, &wkhd)) != 0) softdep_error("handle_workitem_freefile", error); ACQUIRE_LOCK(&lk); WORKITEM_FREE(freefile, D_FREEFILE); @@ -3757,8 +6415,9 @@ softdep_disk_io_initiation(bp) { struct worklist *wk; struct worklist marker; - struct indirdep *indirdep; struct inodedep *inodedep; + struct freeblks *freeblks; + struct jfreeblk *jfreeblk; /* * We only care about write operations. There should never @@ -3767,6 +6426,10 @@ softdep_disk_io_initiation(bp) if (bp->b_iocmd != BIO_WRITE) panic("softdep_disk_io_initiation: not write"); + if (bp->b_vflags & BV_BKGRDINPROG) + panic("softdep_disk_io_initiation: Writing buffer with " + "background write in progress: %p", bp); + marker.wk_type = D_LAST + 1; /* Not a normal workitem */ PHOLD(curproc); /* Don't swap out kernel stack */ @@ -3792,46 +6455,40 @@ softdep_disk_io_initiation(bp) continue; case D_INDIRDEP: - indirdep = WK_INDIRDEP(wk); - if (indirdep->ir_state & GOINGAWAY) - panic("disk_io_initiation: indirdep gone"); + initiate_write_indirdep(WK_INDIRDEP(wk), bp); + continue; + + case D_BMSAFEMAP: + initiate_write_bmsafemap(WK_BMSAFEMAP(wk), bp); + continue; + + case D_JSEG: + WK_JSEG(wk)->js_buf = NULL; + continue; + + case D_FREEBLKS: + freeblks = WK_FREEBLKS(wk); + jfreeblk = LIST_FIRST(&freeblks->fb_jfreeblkhd); /* - * If there are no remaining dependencies, this - * will be writing the real pointers, so the - * dependency can be freed. + * We have to wait for the jfreeblks to be journaled + * before we can write an inodeblock with updated + * pointers. Be careful to arrange the marker so + * we revisit the jfreeblk if it's not removed by + * the first jwait(). */ - if (LIST_EMPTY(&indirdep->ir_deplisthd)) { - struct buf *bp; - - bp = indirdep->ir_savebp; - bp->b_flags |= B_INVAL | B_NOCACHE; - /* inline expand WORKLIST_REMOVE(wk); */ - wk->wk_state &= ~ONWORKLIST; - LIST_REMOVE(wk, wk_list); - WORKITEM_FREE(indirdep, D_INDIRDEP); - FREE_LOCK(&lk); - brelse(bp); - ACQUIRE_LOCK(&lk); - continue; + if (jfreeblk != NULL) { + LIST_REMOVE(&marker, wk_list); + LIST_INSERT_BEFORE(wk, &marker, wk_list); + jwait(&jfreeblk->jf_list); } - /* - * Replace up-to-date version with safe version. - */ - FREE_LOCK(&lk); - indirdep->ir_saveddata = malloc(bp->b_bcount, - M_INDIRDEP, M_SOFTDEP_FLAGS); - ACQUIRE_LOCK(&lk); - indirdep->ir_state &= ~ATTACHED; - indirdep->ir_state |= UNDONE; - bcopy(bp->b_data, indirdep->ir_saveddata, bp->b_bcount); - bcopy(indirdep->ir_savebp->b_data, bp->b_data, - bp->b_bcount); continue; case D_MKDIR: - case D_BMSAFEMAP: case D_ALLOCDIRECT: case D_ALLOCINDIR: + case D_FREEWORK: + case D_FREEDEP: + case D_JSEGDEP: continue; default: @@ -3855,6 +6512,8 @@ initiate_write_filepage(pagedep, bp) struct pagedep *pagedep; struct buf *bp; { + struct jremref *jremref; + struct dirrem *dirrem; struct diradd *dap; struct direct *ep; int i; @@ -3869,6 +6528,16 @@ initiate_write_filepage(pagedep, bp) return; } pagedep->pd_state |= IOSTARTED; + /* + * Wait for all journal remove dependencies to hit the disk. + * We can not allow any potentially conflicting directory adds + * to be visible before removes and rollback is too difficult. + * lk may be dropped and re-acquired, however we hold the buf + * locked so the dependency can not go away. + */ + LIST_FOREACH(dirrem, &pagedep->pd_dirremhd, dm_next) + while ((jremref = LIST_FIRST(&dirrem->dm_jremrefhd)) != NULL) + jwait(&jremref->jr_list); for (i = 0; i < DAHASHSZ; i++) { LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist) { ep = (struct direct *) @@ -3905,6 +6574,7 @@ initiate_write_inodeblock_ufs1(inodedep, bp) struct allocdirect *adp, *lastadp; struct ufs1_dinode *dp; struct ufs1_dinode *sip; + struct jaddref *jaddref; struct fs *fs; ufs_lbn_t i; #ifdef INVARIANTS @@ -3940,32 +6610,41 @@ initiate_write_inodeblock_ufs1(inodedep, bp) */ inodedep->id_savedsize = dp->di_size; inodedep->id_savedextsize = 0; - if (TAILQ_EMPTY(&inodedep->id_inoupdt)) + if (TAILQ_EMPTY(&inodedep->id_inoupdt) && + LIST_EMPTY(&inodedep->id_jaddrefhd)) return; /* + * Revert the link count for every jaddref present. + */ + LIST_FOREACH(jaddref, &inodedep->id_jaddrefhd, ja_inodeps) { + dp->di_nlink--; + jaddref->ja_state &= ~ATTACHED; + jaddref->ja_state |= UNDONE; + } + /* * Set the dependencies to busy. */ for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp; adp = TAILQ_NEXT(adp, ad_next)) { #ifdef INVARIANTS - if (deplist != 0 && prevlbn >= adp->ad_lbn) + if (deplist != 0 && prevlbn >= adp->ad_offset) panic("softdep_write_inodeblock: lbn order"); - prevlbn = adp->ad_lbn; - if (adp->ad_lbn < NDADDR && - dp->di_db[adp->ad_lbn] != adp->ad_newblkno) + prevlbn = adp->ad_offset; + if (adp->ad_offset < NDADDR && + dp->di_db[adp->ad_offset] != adp->ad_newblkno) panic("%s: direct pointer #%jd mismatch %d != %jd", "softdep_write_inodeblock", - (intmax_t)adp->ad_lbn, - dp->di_db[adp->ad_lbn], + (intmax_t)adp->ad_offset, + dp->di_db[adp->ad_offset], (intmax_t)adp->ad_newblkno); - if (adp->ad_lbn >= NDADDR && - dp->di_ib[adp->ad_lbn - NDADDR] != adp->ad_newblkno) + if (adp->ad_offset >= NDADDR && + dp->di_ib[adp->ad_offset - NDADDR] != adp->ad_newblkno) panic("%s: indirect pointer #%jd mismatch %d != %jd", "softdep_write_inodeblock", - (intmax_t)adp->ad_lbn - NDADDR, - dp->di_ib[adp->ad_lbn - NDADDR], + (intmax_t)adp->ad_offset - NDADDR, + dp->di_ib[adp->ad_offset - NDADDR], (intmax_t)adp->ad_newblkno); - deplist |= 1 << adp->ad_lbn; + deplist |= 1 << adp->ad_offset; if ((adp->ad_state & ATTACHED) == 0) panic("softdep_write_inodeblock: Unknown state 0x%x", adp->ad_state); @@ -3981,14 +6660,14 @@ initiate_write_inodeblock_ufs1(inodedep, bp) */ for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp; lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) { - if (adp->ad_lbn >= NDADDR) + if (adp->ad_offset >= NDADDR) break; - dp->di_db[adp->ad_lbn] = adp->ad_oldblkno; + dp->di_db[adp->ad_offset] = adp->ad_oldblkno; /* keep going until hitting a rollback to a frag */ if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize) continue; - dp->di_size = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize; - for (i = adp->ad_lbn + 1; i < NDADDR; i++) { + dp->di_size = fs->fs_bsize * adp->ad_offset + adp->ad_oldsize; + for (i = adp->ad_offset + 1; i < NDADDR; i++) { #ifdef INVARIANTS if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0) panic("softdep_write_inodeblock: lost dep1"); @@ -4012,8 +6691,8 @@ initiate_write_inodeblock_ufs1(inodedep, bp) * we already checked for fragments in the loop above. */ if (lastadp != NULL && - dp->di_size <= (lastadp->ad_lbn + 1) * fs->fs_bsize) { - for (i = lastadp->ad_lbn; i >= 0; i--) + dp->di_size <= (lastadp->ad_offset + 1) * fs->fs_bsize) { + for (i = lastadp->ad_offset; i >= 0; i--) if (dp->di_db[i] != 0) break; dp->di_size = (i + 1) * fs->fs_bsize; @@ -4030,7 +6709,7 @@ initiate_write_inodeblock_ufs1(inodedep, bp) * postpone fsck, we are stuck with this argument. */ for (; adp; adp = TAILQ_NEXT(adp, ad_next)) - dp->di_ib[adp->ad_lbn - NDADDR] = 0; + dp->di_ib[adp->ad_offset - NDADDR] = 0; } /* @@ -4051,6 +6730,7 @@ initiate_write_inodeblock_ufs2(inodedep, bp) struct allocdirect *adp, *lastadp; struct ufs2_dinode *dp; struct ufs2_dinode *sip; + struct jaddref *jaddref; struct fs *fs; ufs_lbn_t i; #ifdef INVARIANTS @@ -4087,24 +6767,33 @@ initiate_write_inodeblock_ufs2(inodedep, bp) inodedep->id_savedsize = dp->di_size; inodedep->id_savedextsize = dp->di_extsize; if (TAILQ_EMPTY(&inodedep->id_inoupdt) && - TAILQ_EMPTY(&inodedep->id_extupdt)) + TAILQ_EMPTY(&inodedep->id_extupdt) && + LIST_EMPTY(&inodedep->id_jaddrefhd)) return; /* + * Revert the link count for every jaddref present. + */ + LIST_FOREACH(jaddref, &inodedep->id_jaddrefhd, ja_inodeps) { + dp->di_nlink--; + jaddref->ja_state &= ~ATTACHED; + jaddref->ja_state |= UNDONE; + } + /* * Set the ext data dependencies to busy. */ for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_extupdt); adp; adp = TAILQ_NEXT(adp, ad_next)) { #ifdef INVARIANTS - if (deplist != 0 && prevlbn >= adp->ad_lbn) + if (deplist != 0 && prevlbn >= adp->ad_offset) panic("softdep_write_inodeblock: lbn order"); - prevlbn = adp->ad_lbn; - if (dp->di_extb[adp->ad_lbn] != adp->ad_newblkno) + prevlbn = adp->ad_offset; + if (dp->di_extb[adp->ad_offset] != adp->ad_newblkno) panic("%s: direct pointer #%jd mismatch %jd != %jd", "softdep_write_inodeblock", - (intmax_t)adp->ad_lbn, - (intmax_t)dp->di_extb[adp->ad_lbn], + (intmax_t)adp->ad_offset, + (intmax_t)dp->di_extb[adp->ad_offset], (intmax_t)adp->ad_newblkno); - deplist |= 1 << adp->ad_lbn; + deplist |= 1 << adp->ad_offset; if ((adp->ad_state & ATTACHED) == 0) panic("softdep_write_inodeblock: Unknown state 0x%x", adp->ad_state); @@ -4120,12 +6809,12 @@ initiate_write_inodeblock_ufs2(inodedep, bp) */ for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_extupdt); adp; lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) { - dp->di_extb[adp->ad_lbn] = adp->ad_oldblkno; + dp->di_extb[adp->ad_offset] = adp->ad_oldblkno; /* keep going until hitting a rollback to a frag */ if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize) continue; - dp->di_extsize = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize; - for (i = adp->ad_lbn + 1; i < NXADDR; i++) { + dp->di_extsize = fs->fs_bsize * adp->ad_offset + adp->ad_oldsize; + for (i = adp->ad_offset + 1; i < NXADDR; i++) { #ifdef INVARIANTS if (dp->di_extb[i] != 0 && (deplist & (1 << i)) == 0) panic("softdep_write_inodeblock: lost dep1"); @@ -4142,8 +6831,8 @@ initiate_write_inodeblock_ufs2(inodedep, bp) * we already checked for fragments in the loop above. */ if (lastadp != NULL && - dp->di_extsize <= (lastadp->ad_lbn + 1) * fs->fs_bsize) { - for (i = lastadp->ad_lbn; i >= 0; i--) + dp->di_extsize <= (lastadp->ad_offset + 1) * fs->fs_bsize) { + for (i = lastadp->ad_offset; i >= 0; i--) if (dp->di_extb[i] != 0) break; dp->di_extsize = (i + 1) * fs->fs_bsize; @@ -4154,24 +6843,24 @@ initiate_write_inodeblock_ufs2(inodedep, bp) for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp; adp = TAILQ_NEXT(adp, ad_next)) { #ifdef INVARIANTS - if (deplist != 0 && prevlbn >= adp->ad_lbn) + if (deplist != 0 && prevlbn >= adp->ad_offset) panic("softdep_write_inodeblock: lbn order"); - prevlbn = adp->ad_lbn; - if (adp->ad_lbn < NDADDR && - dp->di_db[adp->ad_lbn] != adp->ad_newblkno) + prevlbn = adp->ad_offset; + if (adp->ad_offset < NDADDR && + dp->di_db[adp->ad_offset] != adp->ad_newblkno) panic("%s: direct pointer #%jd mismatch %jd != %jd", "softdep_write_inodeblock", - (intmax_t)adp->ad_lbn, - (intmax_t)dp->di_db[adp->ad_lbn], + (intmax_t)adp->ad_offset, + (intmax_t)dp->di_db[adp->ad_offset], (intmax_t)adp->ad_newblkno); - if (adp->ad_lbn >= NDADDR && - dp->di_ib[adp->ad_lbn - NDADDR] != adp->ad_newblkno) + if (adp->ad_offset >= NDADDR && + dp->di_ib[adp->ad_offset - NDADDR] != adp->ad_newblkno) panic("%s indirect pointer #%jd mismatch %jd != %jd", "softdep_write_inodeblock:", - (intmax_t)adp->ad_lbn - NDADDR, - (intmax_t)dp->di_ib[adp->ad_lbn - NDADDR], + (intmax_t)adp->ad_offset - NDADDR, + (intmax_t)dp->di_ib[adp->ad_offset - NDADDR], (intmax_t)adp->ad_newblkno); - deplist |= 1 << adp->ad_lbn; + deplist |= 1 << adp->ad_offset; if ((adp->ad_state & ATTACHED) == 0) panic("softdep_write_inodeblock: Unknown state 0x%x", adp->ad_state); @@ -4187,14 +6876,14 @@ initiate_write_inodeblock_ufs2(inodedep, bp) */ for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp; lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) { - if (adp->ad_lbn >= NDADDR) + if (adp->ad_offset >= NDADDR) break; - dp->di_db[adp->ad_lbn] = adp->ad_oldblkno; + dp->di_db[adp->ad_offset] = adp->ad_oldblkno; /* keep going until hitting a rollback to a frag */ if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize) continue; - dp->di_size = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize; - for (i = adp->ad_lbn + 1; i < NDADDR; i++) { + dp->di_size = fs->fs_bsize * adp->ad_offset + adp->ad_oldsize; + for (i = adp->ad_offset + 1; i < NDADDR; i++) { #ifdef INVARIANTS if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0) panic("softdep_write_inodeblock: lost dep2"); @@ -4218,8 +6907,8 @@ initiate_write_inodeblock_ufs2(inodedep, bp) * we already checked for fragments in the loop above. */ if (lastadp != NULL && - dp->di_size <= (lastadp->ad_lbn + 1) * fs->fs_bsize) { - for (i = lastadp->ad_lbn; i >= 0; i--) + dp->di_size <= (lastadp->ad_offset + 1) * fs->fs_bsize) { + for (i = lastadp->ad_offset; i >= 0; i--) if (dp->di_db[i] != 0) break; dp->di_size = (i + 1) * fs->fs_bsize; @@ -4236,16 +6925,285 @@ initiate_write_inodeblock_ufs2(inodedep, bp) * postpone fsck, we are stuck with this argument. */ for (; adp; adp = TAILQ_NEXT(adp, ad_next)) - dp->di_ib[adp->ad_lbn - NDADDR] = 0; + dp->di_ib[adp->ad_offset - NDADDR] = 0; } +static void +free_indirdep(indirdep) + struct indirdep *indirdep; +{ + + KASSERT(LIST_EMPTY(&indirdep->ir_jwork), + ("free_indirdep: Journal work not empty.")); + if (indirdep->ir_state & ONWORKLIST) + WORKLIST_REMOVE(&indirdep->ir_list); + WORKITEM_FREE(indirdep, D_INDIRDEP); +} + +static void +initiate_write_indirdep(indirdep, bp) + struct indirdep *indirdep; + struct buf *bp; +{ + + if (indirdep->ir_state & GOINGAWAY) + panic("disk_io_initiation: indirdep gone"); + + /* + * If there are no remaining dependencies, this will be writing + * the real pointers. + */ + if (LIST_EMPTY(&indirdep->ir_deplisthd)) + return; + /* + * Replace up-to-date version with safe version. + */ + FREE_LOCK(&lk); + indirdep->ir_saveddata = malloc(bp->b_bcount, M_INDIRDEP, + M_SOFTDEP_FLAGS); + ACQUIRE_LOCK(&lk); + indirdep->ir_state &= ~ATTACHED; + indirdep->ir_state |= UNDONE; + bcopy(bp->b_data, indirdep->ir_saveddata, bp->b_bcount); + bcopy(indirdep->ir_savebp->b_data, bp->b_data, + bp->b_bcount); +} + /* + * Called when an inode has been cleared in a cg bitmap. This finally + * eliminates any canceled jaddrefs + */ +void +softdep_setup_inofree(mp, bp, ino, wkhd) + struct mount *mp; + struct buf *bp; + ino_t ino; + struct workhead *wkhd; +{ + struct worklist *wk, *wkn; + struct bmsafemap *bmsafemap; + struct inodedep *inodedep; + uint8_t *inosused; + struct cg *cgp; + struct fs *fs; + + ACQUIRE_LOCK(&lk); + fs = VFSTOUFS(mp)->um_fs; + bmsafemap = bmsafemap_lookup(mp, bp, ino_to_cg(fs, ino)); + cgp = (struct cg *)bp->b_data; + inosused = cg_inosused(cgp); + if (isset(inosused, ino % fs->fs_ipg)) + panic("softdep_setup_inofree: inode %d not freed.", ino); + if (inodedep_lookup(mp, ino, 0, &inodedep)) + panic("softdep_setup_inofree: ino %d has existing inodedep %p", + ino, inodedep); + if (wkhd) { /* XXX Temporary. */ + LIST_FOREACH_SAFE(wk, wkhd, wk_list, wkn) { + if (wk->wk_type != D_JADDREF) + continue; + WORKLIST_REMOVE(wk); + /* + * We can free immediately even if the jaddref isn't attached + * in a background write as now the bitmaps are reconciled. + */ + wk->wk_state |= COMPLETE | ATTACHED; + free_jaddref(WK_JADDREF(wk)); + } + } + FREE_LOCK(&lk); +} + + +/* + * Called via ffs_blkfree() after a set of frags has been cleared from a cg + * map. Any dependencies waiting for the write to clear are added to the + * buf's list and any jnewblks that are being canceled are discarded + * immediately. + */ +void +softdep_setup_blkfree(mp, bp, blkno, frags, wkhd) + struct mount *mp; + struct buf *bp; + ufs2_daddr_t blkno; + int frags; + struct workhead *wkhd; +{ + struct bmsafemap *bmsafemap; + struct jnewblk *jnewblk; + struct worklist *wk, *wkn; + uint8_t *blksfree; + struct cg *cgp; + struct fs *fs; + ufs2_daddr_t jstart; + ufs2_daddr_t jend; + ufs2_daddr_t end; + long bno; + int i; + + ACQUIRE_LOCK(&lk); + fs = VFSTOUFS(mp)->um_fs; + bmsafemap = bmsafemap_lookup(mp, bp, dtog(fs, blkno)); + + /* + * Detach any jnewblks which have been canceled. They must linger + * until the bitmap is cleared again by ffs_blkfree() to prevent + * an unjournaled allocation from hitting the disk. + */ + if (wkhd) { /* XXX Should be temporary. */ + LIST_FOREACH_SAFE(wk, wkhd, wk_list, wkn) { + if (wk->wk_type != D_JNEWBLK) + continue; + jnewblk = WK_JNEWBLK(wk); + KASSERT(jnewblk->jn_state & GOINGAWAY, + ("softdep_setup_blkfree: Freed jnewblk not going away.")); + WORKLIST_REMOVE(wk); + /* + * Assert that this block is free in the bitmap before we + * discard the jnewblk. + */ + cgp = (struct cg *)bp->b_data; + blksfree = cg_blksfree(cgp); + bno = dtogd(fs, jnewblk->jn_blkno); + for (i = jnewblk->jn_oldfrags; i < jnewblk->jn_frags; i++) + if (isclr(blksfree, bno + i)) + panic("softdep_setup_blkfree: %jd - %jd(%jd) not free", + blkno, jnewblk->jn_blkno, jnewblk->jn_lbn); + /* + * Even if it's not attached we can free immediately as the + * new bitmap is correct. + */ + wk->wk_state |= COMPLETE | ATTACHED; + free_jnewblk(jnewblk); + } + /* + * The buf must be locked by the caller otherwise these could be + * added while it's being written and the write would complete + * them before they made it to disk. + */ + jwork_move(D_BMSAFEMAP, __LINE__, &bp->b_dep, wkhd); + } + /* + * Assert that we are not freeing a block which has an outstanding + * allocation dependency. + */ + fs = VFSTOUFS(bmsafemap->sm_list.wk_mp)->um_fs; + end = blkno + frags; + LIST_FOREACH(jnewblk, &bmsafemap->sm_jnewblkhd, jn_deps) { + /* + * Don't match against blocks that will be freed when the + * background write is done. + */ + if ((jnewblk->jn_state & (ATTACHED | COMPLETE | DEPCOMPLETE)) == + (COMPLETE | DEPCOMPLETE)) + continue; + jstart = jnewblk->jn_blkno + jnewblk->jn_oldfrags; + jend = jnewblk->jn_blkno + jnewblk->jn_frags; + if ((blkno >= jstart && blkno < jend) || + (end > jstart && end <= jend)) { + printf("state 0x%X %jd - %d %d dep %p\n", + jnewblk->jn_state, jnewblk->jn_blkno, + jnewblk->jn_oldfrags, jnewblk->jn_frags, + jnewblk->jn_newblk); + panic("softdep_setup_blkfree: " + "%jd-%jd(%d) overlaps with %jd-%jd", + blkno, end, frags, jstart, jend); + } + } + FREE_LOCK(&lk); +} + +static void +initiate_write_bmsafemap(bmsafemap, bp) + struct bmsafemap *bmsafemap; + struct buf *bp; /* The cg block. */ +{ + struct jaddref *jaddref; + struct jnewblk *jnewblk; + uint8_t *inosused; + uint8_t *blksfree; + struct cg *cgp; + struct fs *fs; + int cleared; + ino_t ino; + long bno; + int i; + + if (bmsafemap->sm_state & IOSTARTED) + panic("initiate_write_bmsafemap: Already started\n"); + bmsafemap->sm_state |= IOSTARTED; + /* + * Clear any inode allocations which are pending journal writes. + */ + if (LIST_FIRST(&bmsafemap->sm_jaddrefhd) != NULL) { + cgp = (struct cg *)bp->b_data; + fs = VFSTOUFS(bmsafemap->sm_list.wk_mp)->um_fs; + inosused = cg_inosused(cgp); + LIST_FOREACH(jaddref, &bmsafemap->sm_jaddrefhd, ja_bmdeps) { + ino = jaddref->ja_ino % fs->fs_ipg; + /* + * If this is a background copy the inode may not + * be marked used yet. + */ + if (isset(inosused, ino)) { + if ((jaddref->ja_mode & IFMT) == IFDIR) + cgp->cg_cs.cs_ndir--; + cgp->cg_cs.cs_nifree++; + clrbit(inosused, ino); + jaddref->ja_state &= ~ATTACHED; + jaddref->ja_state |= UNDONE; + } else if ((bp->b_xflags & BX_BKGRDMARKER) == 0) + panic("initiate_write_bmsafemap: inode %d " + "marked free", jaddref->ja_ino); + } + } + /* + * Clear any block allocations which are pending journal writes. + */ + if (LIST_FIRST(&bmsafemap->sm_jnewblkhd) != NULL) { + cgp = (struct cg *)bp->b_data; + fs = VFSTOUFS(bmsafemap->sm_list.wk_mp)->um_fs; + blksfree = cg_blksfree(cgp); + LIST_FOREACH(jnewblk, &bmsafemap->sm_jnewblkhd, jn_deps) { + bno = dtogd(fs, jnewblk->jn_blkno); + cleared = 0; + for (i = jnewblk->jn_oldfrags; i < jnewblk->jn_frags; + i++) { + if (isclr(blksfree, bno + i)) { + cleared = 1; + setbit(blksfree, bno + i); + } + } + /* + * We may not clear the block if it's a background + * copy. In that case there is no reason to detach + * it. + */ + if (cleared) { + jnewblk->jn_state &= ~ATTACHED; + jnewblk->jn_state |= UNDONE; + } else if ((bp->b_xflags & BX_BKGRDMARKER) == 0) + panic("initiate_write_bmsafemap: block %jd " + "marked free", jnewblk->jn_blkno); + } + } + /* + * Move allocation lists to the written lists so they can be + * cleared once the block write is complete. + */ + LIST_SWAP(&bmsafemap->sm_inodedephd, &bmsafemap->sm_inodedepwr, + inodedep, id_deps); + LIST_SWAP(&bmsafemap->sm_newblkhd, &bmsafemap->sm_newblkwr, + newblk, nb_deps); +} + +/* * This routine is called during the completion interrupt * service routine for a disk write (from the procedure called * by the device driver to inform the filesystem caches of * a request completion). It should be called early in this * procedure, before the block is made available to other * processes or other routines are called. + * */ static void softdep_disk_write_complete(bp) @@ -4254,12 +7212,7 @@ softdep_disk_write_complete(bp) struct worklist *wk; struct worklist *owk; struct workhead reattach; - struct newblk *newblk; - struct allocindir *aip; - struct allocdirect *adp; - struct indirdep *indirdep; - struct inodedep *inodedep; - struct bmsafemap *bmsafemap; + struct buf *sbp; /* * If an error occurred while doing the write, then the data @@ -4271,8 +7224,9 @@ softdep_disk_write_complete(bp) /* * This lock must not be released anywhere in this code segment. */ + sbp = NULL; + owk = NULL; ACQUIRE_LOCK(&lk); - owk = NULL; while ((wk = LIST_FIRST(&bp->b_dep)) != NULL) { WORKLIST_REMOVE(wk); if (wk == owk) @@ -4291,33 +7245,8 @@ softdep_disk_write_complete(bp) continue; case D_BMSAFEMAP: - bmsafemap = WK_BMSAFEMAP(wk); - while ((newblk = LIST_FIRST(&bmsafemap->sm_newblkhd))) { - newblk->nb_state |= DEPCOMPLETE; - newblk->nb_bmsafemap = NULL; - LIST_REMOVE(newblk, nb_deps); - } - while ((adp = - LIST_FIRST(&bmsafemap->sm_allocdirecthd))) { - adp->ad_state |= DEPCOMPLETE; - adp->ad_buf = NULL; - LIST_REMOVE(adp, ad_deps); - handle_allocdirect_partdone(adp); - } - while ((aip = - LIST_FIRST(&bmsafemap->sm_allocindirhd))) { - aip->ai_state |= DEPCOMPLETE; - aip->ai_buf = NULL; - LIST_REMOVE(aip, ai_deps); - handle_allocindir_partdone(aip); - } - while ((inodedep = - LIST_FIRST(&bmsafemap->sm_inodedephd)) != NULL) { - inodedep->id_state |= DEPCOMPLETE; - LIST_REMOVE(inodedep, id_deps); - inodedep->id_buf = NULL; - } - WORKITEM_FREE(bmsafemap, D_BMSAFEMAP); + if (handle_written_bmsafemap(WK_BMSAFEMAP(wk), bp)) + WORKLIST_INSERT(&reattach, wk); continue; case D_MKDIR: @@ -4325,37 +7254,42 @@ softdep_disk_write_complete(bp) continue; case D_ALLOCDIRECT: - adp = WK_ALLOCDIRECT(wk); - adp->ad_state |= COMPLETE; - handle_allocdirect_partdone(adp); + wk->wk_state |= COMPLETE; + handle_allocdirect_partdone(WK_ALLOCDIRECT(wk), NULL); continue; case D_ALLOCINDIR: - aip = WK_ALLOCINDIR(wk); - aip->ai_state |= COMPLETE; - handle_allocindir_partdone(aip); + wk->wk_state |= COMPLETE; + handle_allocindir_partdone(WK_ALLOCINDIR(wk)); continue; case D_INDIRDEP: - indirdep = WK_INDIRDEP(wk); - if (indirdep->ir_state & GOINGAWAY) - panic("disk_write_complete: indirdep gone"); - bcopy(indirdep->ir_saveddata, bp->b_data, bp->b_bcount); - free(indirdep->ir_saveddata, M_INDIRDEP); - indirdep->ir_saveddata = 0; - indirdep->ir_state &= ~UNDONE; - indirdep->ir_state |= ATTACHED; - while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != 0) { - handle_allocindir_partdone(aip); - if (aip == LIST_FIRST(&indirdep->ir_donehd)) - panic("disk_write_complete: not gone"); - } - WORKLIST_INSERT(&reattach, wk); - if ((bp->b_flags & B_DELWRI) == 0) - stat_indir_blk_ptrs++; - bdirty(bp); + if (handle_written_indirdep(WK_INDIRDEP(wk), bp, &sbp)) + WORKLIST_INSERT(&reattach, wk); continue; + case D_FREEBLKS: + wk->wk_state |= COMPLETE; + if ((wk->wk_state & ALLCOMPLETE) == ALLCOMPLETE) + add_to_worklist(wk); + continue; + + case D_FREEWORK: + handle_written_freework(WK_FREEWORK(wk)); + break; + + case D_FREEDEP: + free_freedep(WK_FREEDEP(wk)); + continue; + + case D_JSEGDEP: + free_jsegdep(WK_JSEGDEP(wk)); + continue; + + case D_JSEG: + handle_written_jseg(WK_JSEG(wk), bp); + continue; + default: panic("handle_disk_write_complete: Unknown type %s", TYPENAME(wk->wk_type)); @@ -4370,6 +7304,8 @@ softdep_disk_write_complete(bp) WORKLIST_INSERT(&bp->b_dep, wk); } FREE_LOCK(&lk); + if (sbp) + brelse(sbp); } /* @@ -4378,18 +7314,17 @@ softdep_disk_write_complete(bp) * splbio interrupts blocked. */ static void -handle_allocdirect_partdone(adp) +handle_allocdirect_partdone(adp, wkhd) struct allocdirect *adp; /* the completed allocdirect */ + struct workhead *wkhd; /* Work to do when inode is writtne. */ { struct allocdirectlst *listhead; struct allocdirect *listadp; struct inodedep *inodedep; - long bsize, delay; + long bsize; if ((adp->ad_state & ALLCOMPLETE) != ALLCOMPLETE) return; - if (adp->ad_buf != NULL) - panic("handle_allocdirect_partdone: dangling dep"); /* * The on-disk inode cannot claim to be any larger than the last * fragment that has been written. Otherwise, the on-disk inode @@ -4441,23 +7376,27 @@ static void /* * If we have found the just finished dependency, then free * it along with anything that follows it that is complete. - * If the inode still has a bitmap dependency, then it has - * never been written to disk, hence the on-disk inode cannot - * reference the old fragment so we can free it without delay. + * If the inode is not ALLCOMPLETE the pointer may not yet + * be written. Place the allocdirect on the bufwait to + * be freed when we're sure it is reachable on disk. */ - delay = (inodedep->id_state & DEPCOMPLETE); + if ((inodedep->id_state & ALLCOMPLETE) != ALLCOMPLETE) + wkhd = &inodedep->id_bufwait; for (; adp; adp = listadp) { listadp = TAILQ_NEXT(adp, ad_next); if ((adp->ad_state & ALLCOMPLETE) != ALLCOMPLETE) return; - free_allocdirect(listhead, adp, delay); + TAILQ_REMOVE(listhead, adp, ad_next); + if (wkhd) + WORKLIST_INSERT(wkhd, &adp->ad_block.nb_list); + else + free_newblk(&adp->ad_block); } } /* - * Called from within softdep_disk_write_complete above. Note that - * this routine is always called from interrupt level with further - * splbio interrupts blocked. + * Called from within softdep_disk_write_complete above. This routine + * completes successfully written allocindirs. */ static void handle_allocindir_partdone(aip) @@ -4467,11 +7406,9 @@ handle_allocindir_partdone(aip) if ((aip->ai_state & ALLCOMPLETE) != ALLCOMPLETE) return; - if (aip->ai_buf != NULL) - panic("handle_allocindir_partdone: dangling dependency"); indirdep = aip->ai_indirdep; + LIST_REMOVE(aip, ai_next); if (indirdep->ir_state & UNDONE) { - LIST_REMOVE(aip, ai_next); LIST_INSERT_HEAD(&indirdep->ir_donehd, aip, ai_next); return; } @@ -4481,12 +7418,120 @@ handle_allocindir_partdone(aip) else ((ufs2_daddr_t *)indirdep->ir_savebp->b_data)[aip->ai_offset] = aip->ai_newblkno; - LIST_REMOVE(aip, ai_next); - if (aip->ai_freefrag != NULL) - add_to_worklist(&aip->ai_freefrag->ff_list); - WORKITEM_FREE(aip, D_ALLOCINDIR); + /* + * Await the pointer write before freeing the allocindir. + */ + LIST_INSERT_HEAD(&indirdep->ir_writehd, aip, ai_next); } +static void +handle_jwork(wkhd) + struct workhead *wkhd; +{ + struct worklist *wk; + + while ((wk = LIST_FIRST(wkhd)) != NULL) { + WORKLIST_REMOVE(wk); + switch (wk->wk_type) { + case D_JSEGDEP: + free_jsegdep(WK_JSEGDEP(wk)); + continue; + default: + panic("handle_jwork: Unknown type %s\n", + TYPENAME(wk->wk_type)); + } + } +} + +static struct freefile * +handle_bufwait(inodedep, refhd) + struct inodedep *inodedep; + struct workhead *refhd; +{ + struct jaddref *jaddref; + struct freefile *freefile; + struct worklist *wk; + + freefile = NULL; + while ((wk = LIST_FIRST(&inodedep->id_bufwait)) != NULL) { + WORKLIST_REMOVE(wk); + switch (wk->wk_type) { + case D_FREEFILE: + /* + * We defer adding freefile to the worklist + * until all other additions have been made to + * ensure that it will be done after all the + * old blocks have been freed. + */ + if (freefile != NULL) + panic("handle_bufwait: freefile"); + freefile = WK_FREEFILE(wk); + continue; + + case D_MKDIR: + handle_written_mkdir(WK_MKDIR(wk), MKDIR_PARENT); + continue; + + case D_DIRADD: + diradd_inode_written(WK_DIRADD(wk), inodedep); + continue; + + case D_FREEFRAG: + wk->wk_state |= COMPLETE; + if ((wk->wk_state & ALLCOMPLETE) != ALLCOMPLETE) + continue; + add_to_worklist(wk); + continue; + + case D_DIRREM: + wk->wk_state |= COMPLETE; + add_to_worklist(wk); + continue; + + case D_ALLOCDIRECT: + case D_ALLOCINDIR: + free_newblk(WK_NEWBLK(wk)); + continue; + + case D_JSEGDEP: + free_jsegdep(WK_JSEGDEP(wk)); + continue; + + case D_JADDREF: + jaddref = WK_JADDREF(wk); + /* + * We have to remove this journal entry from the + * inode's list as soon as it's written so the + * inodedep can be freed. + */ + if (jaddref->ja_state & ONDEPLIST) { + jaddref->ja_state &= ~ONDEPLIST; + LIST_REMOVE(jaddref, ja_inodeps); + } + /* + * Transfer any jaddrefs to the list to be freed with + * the bitmap if we're handling a removed file. + */ + if (refhd == NULL) { + wk->wk_state |= COMPLETE; + free_jaddref(jaddref); + } else + WORKLIST_INSERT(refhd, wk); + continue; + + case D_JNEWBLK: + wk->wk_state |= COMPLETE; + free_jnewblk(WK_JNEWBLK(wk)); + continue; + + default: + panic("handle_bufwait: Unknown type %p(%s)", + wk, TYPENAME(wk->wk_type)); + /* NOTREACHED */ + } + } + return (freefile); +} /* * Called from within softdep_disk_write_complete above to restore * in-memory inode block contents to their most up-to-date state. Note @@ -4498,12 +7543,16 @@ handle_written_inodeblock(inodedep, bp) struct inodedep *inodedep; struct buf *bp; /* buffer containing the inode block */ { - struct worklist *wk, *filefree; + struct freefile *freefile; struct allocdirect *adp, *nextadp; struct ufs1_dinode *dp1 = NULL; struct ufs2_dinode *dp2 = NULL; + struct jaddref *jaddref, *tmp; + struct workhead wkhd; int hadchanges, fstype; + LIST_INIT(&wkhd); + hadchanges = 0; if ((inodedep->id_state & IOSTARTED) == 0) panic("handle_written_inodeblock: not started"); inodedep->id_state &= ~IOSTARTED; @@ -4524,6 +7573,7 @@ handle_written_inodeblock(inodedep, bp) * corresponding updates written to disk. */ if (inodedep->id_savedino1 != NULL) { + hadchanges = 1; if (fstype == UFS1) *dp1 = *inodedep->id_savedino1; else @@ -4533,6 +7583,17 @@ handle_written_inodeblock(inodedep, bp) if ((bp->b_flags & B_DELWRI) == 0) stat_inode_bitmap++; bdirty(bp); + /* + * If the inode is clear here and GOINGAWAY it will never + * be written. Process the bufwait to find the freefile + * and add it to the worklist. The state is altered so + * free_inodedep() will succeed. The DEPCOMPLETE is + * otherwise never visible. + */ + if (inodedep->id_state & GOINGAWAY) { + inodedep->id_state |= COMPLETE | DEPCOMPLETE; + goto bufwait; + } return (1); } inodedep->id_state |= COMPLETE; @@ -4540,50 +7601,64 @@ handle_written_inodeblock(inodedep, bp) * Roll forward anything that had to be rolled back before * the inode could be updated. */ - hadchanges = 0; + /* + * Restore the link count for every jaddref present. + */ + LIST_FOREACH_SAFE(jaddref, &inodedep->id_jaddrefhd, ja_inodeps, tmp) { + hadchanges = 1; + if ((jaddref->ja_state & UNDONE) == 0) + continue; + if (fstype == UFS1) + dp1->di_nlink++; + else + dp2->di_nlink++; + jaddref->ja_state &= ~UNDONE; + jaddref->ja_state |= ATTACHED; + free_jaddref(jaddref); + } for (adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp; adp = nextadp) { nextadp = TAILQ_NEXT(adp, ad_next); if (adp->ad_state & ATTACHED) panic("handle_written_inodeblock: new entry"); if (fstype == UFS1) { - if (adp->ad_lbn < NDADDR) { - if (dp1->di_db[adp->ad_lbn]!=adp->ad_oldblkno) + if (adp->ad_offset < NDADDR) { + if (dp1->di_db[adp->ad_offset]!=adp->ad_oldblkno) panic("%s %s #%jd mismatch %d != %jd", "handle_written_inodeblock:", "direct pointer", - (intmax_t)adp->ad_lbn, - dp1->di_db[adp->ad_lbn], + (intmax_t)adp->ad_offset, + dp1->di_db[adp->ad_offset], (intmax_t)adp->ad_oldblkno); - dp1->di_db[adp->ad_lbn] = adp->ad_newblkno; + dp1->di_db[adp->ad_offset] = adp->ad_newblkno; } else { - if (dp1->di_ib[adp->ad_lbn - NDADDR] != 0) + if (dp1->di_ib[adp->ad_offset - NDADDR] != 0) panic("%s: %s #%jd allocated as %d", "handle_written_inodeblock", "indirect pointer", - (intmax_t)adp->ad_lbn - NDADDR, - dp1->di_ib[adp->ad_lbn - NDADDR]); - dp1->di_ib[adp->ad_lbn - NDADDR] = + (intmax_t)adp->ad_offset - NDADDR, + dp1->di_ib[adp->ad_offset - NDADDR]); + dp1->di_ib[adp->ad_offset - NDADDR] = adp->ad_newblkno; } } else { - if (adp->ad_lbn < NDADDR) { - if (dp2->di_db[adp->ad_lbn]!=adp->ad_oldblkno) + if (adp->ad_offset < NDADDR) { + if (dp2->di_db[adp->ad_offset]!=adp->ad_oldblkno) panic("%s: %s #%jd %s %jd != %jd", "handle_written_inodeblock", "direct pointer", - (intmax_t)adp->ad_lbn, "mismatch", - (intmax_t)dp2->di_db[adp->ad_lbn], + (intmax_t)adp->ad_offset, "mismatch", + (intmax_t)dp2->di_db[adp->ad_offset], (intmax_t)adp->ad_oldblkno); - dp2->di_db[adp->ad_lbn] = adp->ad_newblkno; + dp2->di_db[adp->ad_offset] = adp->ad_newblkno; } else { - if (dp2->di_ib[adp->ad_lbn - NDADDR] != 0) + if (dp2->di_ib[adp->ad_offset - NDADDR] != 0) panic("%s: %s #%jd allocated as %jd", "handle_written_inodeblock", "indirect pointer", - (intmax_t)adp->ad_lbn - NDADDR, + (intmax_t)adp->ad_offset - NDADDR, (intmax_t) - dp2->di_ib[adp->ad_lbn - NDADDR]); - dp2->di_ib[adp->ad_lbn - NDADDR] = + dp2->di_ib[adp->ad_offset - NDADDR]); + dp2->di_ib[adp->ad_offset - NDADDR] = adp->ad_newblkno; } } @@ -4595,13 +7670,13 @@ handle_written_inodeblock(inodedep, bp) nextadp = TAILQ_NEXT(adp, ad_next); if (adp->ad_state & ATTACHED) panic("handle_written_inodeblock: new entry"); - if (dp2->di_extb[adp->ad_lbn] != adp->ad_oldblkno) + if (dp2->di_extb[adp->ad_offset] != adp->ad_oldblkno) panic("%s: direct pointers #%jd %s %jd != %jd", "handle_written_inodeblock", - (intmax_t)adp->ad_lbn, "mismatch", - (intmax_t)dp2->di_extb[adp->ad_lbn], + (intmax_t)adp->ad_offset, "mismatch", + (intmax_t)dp2->di_extb[adp->ad_offset], (intmax_t)adp->ad_oldblkno); - dp2->di_extb[adp->ad_lbn] = adp->ad_newblkno; + dp2->di_extb[adp->ad_offset] = adp->ad_newblkno; adp->ad_state &= ~UNDONE; adp->ad_state |= ATTACHED; hadchanges = 1; @@ -4641,9 +7716,9 @@ handle_written_inodeblock(inodedep, bp) * Process any allocdirects that completed during the update. */ if ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != NULL) - handle_allocdirect_partdone(adp); + handle_allocdirect_partdone(adp, &wkhd); if ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != NULL) - handle_allocdirect_partdone(adp); + handle_allocdirect_partdone(adp, &wkhd); /* * Process deallocations that were held pending until the * inode had been written to disk. Freeing of the inode @@ -4651,55 +7726,25 @@ handle_written_inodeblock(inodedep, bp) * avoid creation of new triples * before the old ones have been deleted. */ - filefree = NULL; - while ((wk = LIST_FIRST(&inodedep->id_bufwait)) != NULL) { - WORKLIST_REMOVE(wk); - switch (wk->wk_type) { +bufwait: + freefile = handle_bufwait(inodedep, NULL); + if (freefile && !LIST_EMPTY(&wkhd)) { + WORKLIST_INSERT(&wkhd, &freefile->fx_list); + freefile = NULL; + } + /* + * Move rolled forward dependency completions to the bufwait list + * now that those that were already written have been processed. + */ + if (!LIST_EMPTY(&wkhd) && hadchanges == 0) + panic("handle_written_inodeblock: bufwait but no changes"); + jwork_move(D_INODEDEP, __LINE__, &inodedep->id_bufwait, &wkhd); - case D_FREEFILE: - /* - * We defer adding filefree to the worklist until - * all other additions have been made to ensure - * that it will be done after all the old blocks - * have been freed. - */ - if (filefree != NULL) - panic("handle_written_inodeblock: filefree"); - filefree = wk; - continue; - - case D_MKDIR: - handle_written_mkdir(WK_MKDIR(wk), MKDIR_PARENT); - continue; - - case D_DIRADD: - diradd_inode_written(WK_DIRADD(wk), inodedep); - continue; - - case D_FREEBLKS: - wk->wk_state |= COMPLETE; - if ((wk->wk_state & ALLCOMPLETE) != ALLCOMPLETE) - continue; - /* -- fall through -- */ - case D_FREEFRAG: - case D_DIRREM: - add_to_worklist(wk); - continue; - - case D_NEWDIRBLK: - free_newdirblk(WK_NEWDIRBLK(wk)); - continue; - - default: - panic("handle_written_inodeblock: Unknown type %s", - TYPENAME(wk->wk_type)); - /* NOTREACHED */ - } - } - if (filefree != NULL) { + if (freefile != NULL) { if (free_inodedep(inodedep) == 0) - panic("handle_written_inodeblock: live inodedep"); - add_to_worklist(filefree); + panic("handle_written_inodeblock: live inodedep %p", + inodedep); + add_to_worklist(&freefile->fx_list); return (0); } @@ -4707,12 +7752,85 @@ handle_written_inodeblock(inodedep, bp) * If no outstanding dependencies, free it. */ if (free_inodedep(inodedep) || - (TAILQ_FIRST(&inodedep->id_inoupdt) == 0 && - TAILQ_FIRST(&inodedep->id_extupdt) == 0)) + (LIST_FIRST(&inodedep->id_jaddrefhd) == 0 && + TAILQ_FIRST(&inodedep->id_inoupdt) == 0 && + TAILQ_FIRST(&inodedep->id_extupdt) == 0 && + LIST_FIRST(&inodedep->id_bufwait) == 0)) return (0); return (hadchanges); } +static int +handle_written_indirdep(indirdep, bp, bpp) + struct indirdep *indirdep; + struct buf *bp; + struct buf **bpp; +{ + struct allocindir *aip; + int chgs; + + if (indirdep->ir_state & GOINGAWAY) + panic("disk_write_complete: indirdep gone"); + chgs = 0; + /* + * If there were rollbacks revert them here. + */ + if (indirdep->ir_saveddata) { + bcopy(indirdep->ir_saveddata, bp->b_data, bp->b_bcount); + free(indirdep->ir_saveddata, M_INDIRDEP); + indirdep->ir_saveddata = 0; + chgs = 1; + } + indirdep->ir_state &= ~UNDONE; + indirdep->ir_state |= ATTACHED; + /* + * Move allocindirs with written pointers to the completehd if + * the the indirdep's pointer is not yet written. Otherwise + * free them here. + */ + while ((aip = LIST_FIRST(&indirdep->ir_writehd)) != 0) { + LIST_REMOVE(aip, ai_next); + if ((indirdep->ir_state & DEPCOMPLETE) == 0) { + LIST_INSERT_HEAD(&indirdep->ir_completehd, aip, + ai_next); + continue; + } + free_newblk(&aip->ai_block); + } + /* + * Move allocindirs that have finished dependency processing from + * the done list to the write list after updating the pointers. + */ + while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != 0) { + handle_allocindir_partdone(aip); + if (aip == LIST_FIRST(&indirdep->ir_donehd)) + panic("disk_write_complete: not gone"); + chgs = 1; + } + if ((bp->b_flags & B_DELWRI) == 0) + stat_indir_blk_ptrs++; + if (chgs) + bdirty(bp); + /* + * If there are no fresh dependencies and none waiting on writes + * we can free the indirdep. The caller is responsble for + * releasing sbp when it is safe to do so. + */ + if ((indirdep->ir_state & DEPCOMPLETE) && + LIST_EMPTY(&indirdep->ir_deplisthd) && + LIST_EMPTY(&indirdep->ir_writehd)) { + struct buf *sbp; + sbp = indirdep->ir_savebp; + sbp->b_flags |= B_INVAL | B_NOCACHE; + if (indirdep->ir_state & ONDEPLIST) + LIST_REMOVE(indirdep, ir_next); + free_indirdep(indirdep); + *bpp = sbp; + return (0); + } + return (chgs); +} + /* * Process a diradd entry after its dependent inode has been written. * This routine must be called with splbio interrupts blocked. @@ -4722,50 +7840,176 @@ diradd_inode_written(dap, inodedep) struct diradd *dap; struct inodedep *inodedep; { - struct pagedep *pagedep; dap->da_state |= COMPLETE; - if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) { - if (dap->da_state & DIRCHG) - pagedep = dap->da_previous->dm_pagedep; - else - pagedep = dap->da_pagedep; - LIST_REMOVE(dap, da_pdlist); - LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist); - } + complete_diradd(dap); WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list); } /* - * Handle the completion of a mkdir dependency. + * Returns true if the bmsafemap will have rollbacks when written. Must + * only be called with lk and the buf lock on the cg held. */ +static int +bmsafemap_rollbacks(bmsafemap) + struct bmsafemap *bmsafemap; +{ + + return (!LIST_EMPTY(&bmsafemap->sm_jaddrefhd) | + !LIST_EMPTY(&bmsafemap->sm_jnewblkhd)); +} + +/* + * Complete a write to a bmsafemap structure. Roll forward any bitmap + * changes if it's not a background write. Set all written dependencies + * to DEPCOMPLETE and free the structure if possible. + */ +static int +handle_written_bmsafemap(bmsafemap, bp) + struct bmsafemap *bmsafemap; + struct buf *bp; +{ + struct newblk *newblk; + struct inodedep *inodedep; + struct jaddref *jaddref, *jatmp; + struct jnewblk *jnewblk, *jntmp; + uint8_t *inosused; + uint8_t *blksfree; + struct cg *cgp; + struct fs *fs; + ino_t ino; + long bno; + int chgs; + int i; + + if ((bmsafemap->sm_state & IOSTARTED) == 0) + panic("initiate_write_bmsafemap: Not started\n"); + chgs = 0; + bmsafemap->sm_state &= ~IOSTARTED; + /* + * Restore unwritten inode allocation pending jaddref writes. + */ + if (!LIST_EMPTY(&bmsafemap->sm_jaddrefhd)) { + cgp = (struct cg *)bp->b_data; + fs = VFSTOUFS(bmsafemap->sm_list.wk_mp)->um_fs; + inosused = cg_inosused(cgp); + LIST_FOREACH_SAFE(jaddref, &bmsafemap->sm_jaddrefhd, + ja_bmdeps, jatmp) { + if ((jaddref->ja_state & UNDONE) == 0) + continue; + ino = jaddref->ja_ino % fs->fs_ipg; + if (isset(inosused, ino)) + panic("handle_written_bmsafemap: " + "re-allocated inode"); + if ((bp->b_xflags & BX_BKGRDMARKER) == 0) { + if ((jaddref->ja_mode & IFMT) == IFDIR) + cgp->cg_cs.cs_ndir++; + cgp->cg_cs.cs_nifree--; + setbit(inosused, ino); + chgs = 1; + } + jaddref->ja_state &= ~UNDONE; + jaddref->ja_state |= ATTACHED; + free_jaddref(jaddref); + } + } + /* + * Restore any block allocations which are pending journal writes. + */ + if (LIST_FIRST(&bmsafemap->sm_jnewblkhd) != NULL) { + cgp = (struct cg *)bp->b_data; + fs = VFSTOUFS(bmsafemap->sm_list.wk_mp)->um_fs; + blksfree = cg_blksfree(cgp); + LIST_FOREACH_SAFE(jnewblk, &bmsafemap->sm_jnewblkhd, jn_deps, + jntmp) { + if ((jnewblk->jn_state & UNDONE) == 0) + continue; + bno = dtogd(fs, jnewblk->jn_blkno); + for (i = jnewblk->jn_oldfrags; i < jnewblk->jn_frags; + i++) { + if (bp->b_xflags & BX_BKGRDMARKER) + break; + if ((jnewblk->jn_state & NEWBLOCK) == 0 && + isclr(blksfree, bno + i)) + panic("handle_written_bmsafemap: " + "re-allocated fragment"); + clrbit(blksfree, bno + i); + chgs = 1; + } + jnewblk->jn_state &= ~(UNDONE | NEWBLOCK); + jnewblk->jn_state |= ATTACHED; + free_jnewblk(jnewblk); + } + } + while ((newblk = LIST_FIRST(&bmsafemap->sm_newblkwr))) { + newblk->nb_state |= DEPCOMPLETE; + newblk->nb_state &= ~ONDEPLIST; + newblk->nb_bmsafemap = NULL; + LIST_REMOVE(newblk, nb_deps); + if (newblk->nb_list.wk_type == D_ALLOCDIRECT) + handle_allocdirect_partdone( + WK_ALLOCDIRECT(&newblk->nb_list), NULL); + else if (newblk->nb_list.wk_type == D_ALLOCINDIR) + handle_allocindir_partdone( + WK_ALLOCINDIR(&newblk->nb_list)); + } + while ((inodedep = LIST_FIRST(&bmsafemap->sm_inodedepwr)) != NULL) { + inodedep->id_state |= DEPCOMPLETE; + inodedep->id_state &= ~ONDEPLIST; + LIST_REMOVE(inodedep, id_deps); + inodedep->id_bmsafemap = NULL; + } + if (LIST_EMPTY(&bmsafemap->sm_jaddrefhd) && + LIST_EMPTY(&bmsafemap->sm_jnewblkhd) && + LIST_EMPTY(&bmsafemap->sm_newblkhd) && + LIST_EMPTY(&bmsafemap->sm_inodedephd)) { + if (chgs) + bdirty(bp); + LIST_REMOVE(bmsafemap, sm_hash); + WORKITEM_FREE(bmsafemap, D_BMSAFEMAP); + return (0); + } + bdirty(bp); + return (1); +} + +/* + * Try to free a mkdir dependency. + */ static void -handle_written_mkdir(mkdir, type) +complete_mkdir(mkdir) struct mkdir *mkdir; - int type; { struct diradd *dap; - struct pagedep *pagedep; - if (mkdir->md_state != type) - panic("handle_written_mkdir: bad type"); + if ((mkdir->md_state & ALLCOMPLETE) != ALLCOMPLETE) + return; + LIST_REMOVE(mkdir, md_mkdirs); dap = mkdir->md_diradd; - dap->da_state &= ~type; - if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) == 0) + dap->da_state &= ~(mkdir->md_state & (MKDIR_PARENT | MKDIR_BODY)); + if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) == 0) { dap->da_state |= DEPCOMPLETE; - if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) { - if (dap->da_state & DIRCHG) - pagedep = dap->da_previous->dm_pagedep; - else - pagedep = dap->da_pagedep; - LIST_REMOVE(dap, da_pdlist); - LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist); + complete_diradd(dap); } - LIST_REMOVE(mkdir, md_mkdirs); WORKITEM_FREE(mkdir, D_MKDIR); } /* + * Handle the completion of a mkdir dependency. + */ +static void +handle_written_mkdir(mkdir, type) + struct mkdir *mkdir; + int type; +{ + + if ((mkdir->md_state & (MKDIR_PARENT | MKDIR_BODY)) != type) + panic("handle_written_mkdir: bad type"); + mkdir->md_state |= COMPLETE; + complete_mkdir(mkdir); +} + +/* * Called from within softdep_disk_write_complete above. * A write operation was just completed. Removed inodes can * now be freed and associated block pointers may be committed. @@ -4790,7 +8034,10 @@ handle_written_filepage(pagedep, bp) */ while ((dirrem = LIST_FIRST(&pagedep->pd_dirremhd)) != NULL) { LIST_REMOVE(dirrem, dm_next); + dirrem->dm_state |= COMPLETE; dirrem->dm_dirinum = pagedep->pd_ino; + KASSERT(LIST_EMPTY(&dirrem->dm_jremrefhd), + ("handle_written_filepage: Journal entries not written.")); add_to_worklist(&dirrem->dm_list); } /* @@ -4800,7 +8047,7 @@ handle_written_filepage(pagedep, bp) */ if ((pagedep->pd_state & NEWBLOCK) == 0) while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL) - free_diradd(dap); + free_diradd(dap, NULL); /* * Uncommitted directory entries must be restored. */ @@ -4908,6 +8155,7 @@ softdep_update_inodeblock(ip, bp, waitfor) int waitfor; /* nonzero => update must be allowed */ { struct inodedep *inodedep; + struct jaddref *jaddref; struct worklist *wk; struct mount *mp; struct buf *ibp; @@ -4922,6 +8170,7 @@ softdep_update_inodeblock(ip, bp, waitfor) */ mp = UFSTOVFS(ip->i_ump); ACQUIRE_LOCK(&lk); +again: if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) == 0) { FREE_LOCK(&lk); if (ip->i_effnlink != ip->i_nlink) @@ -4931,6 +8180,17 @@ softdep_update_inodeblock(ip, bp, waitfor) if (inodedep->id_nlinkdelta != ip->i_nlink - ip->i_effnlink) panic("softdep_update_inodeblock: bad delta"); /* + * If we're flushing all dependencies we must also move any waiting + * for journal writes onto the bufwait list prior to I/O. + */ + if (waitfor) + LIST_FOREACH(jaddref, &inodedep->id_jaddrefhd, ja_inodeps) { + if (jaddref->ja_state & (GOINGAWAY | COMPLETE)) + continue; + jwait(&jaddref->ja_list); + goto again; + } + /* * Changes have been initiated. Anything depending on these * changes cannot occur until this inode has been written. */ @@ -4945,10 +8205,12 @@ softdep_update_inodeblock(ip, bp, waitfor) */ merge_inode_lists(&inodedep->id_newinoupdt, &inodedep->id_inoupdt); if (!TAILQ_EMPTY(&inodedep->id_inoupdt)) - handle_allocdirect_partdone(TAILQ_FIRST(&inodedep->id_inoupdt)); + handle_allocdirect_partdone(TAILQ_FIRST(&inodedep->id_inoupdt), + NULL); merge_inode_lists(&inodedep->id_newextupdt, &inodedep->id_extupdt); if (!TAILQ_EMPTY(&inodedep->id_extupdt)) - handle_allocdirect_partdone(TAILQ_FIRST(&inodedep->id_extupdt)); + handle_allocdirect_partdone(TAILQ_FIRST(&inodedep->id_extupdt), + NULL); /* * Now that the inode has been pushed into the buffer, the * operations dependent on the inode being written to disk @@ -4975,7 +8237,7 @@ retry: FREE_LOCK(&lk); return; } - ibp = inodedep->id_buf; + ibp = inodedep->id_bmsafemap->sm_buf; ibp = getdirtybuf(ibp, &lk, MNT_WAIT); if (ibp == NULL) { /* @@ -5007,13 +8269,13 @@ merge_inode_lists(newlisthead, oldlisthead) newadp = TAILQ_FIRST(newlisthead); for (listadp = TAILQ_FIRST(oldlisthead); listadp && newadp;) { - if (listadp->ad_lbn < newadp->ad_lbn) { + if (listadp->ad_offset < newadp->ad_offset) { listadp = TAILQ_NEXT(listadp, ad_next); continue; } TAILQ_REMOVE(newlisthead, newadp, ad_next); TAILQ_INSERT_BEFORE(listadp, newadp, ad_next); - if (listadp->ad_lbn == newadp->ad_lbn) { + if (listadp->ad_offset == newadp->ad_offset) { allocdirect_merge(oldlisthead, newadp, listadp); listadp = newadp; @@ -5057,12 +8319,11 @@ softdep_fsync(vp) return (0); } if (!LIST_EMPTY(&inodedep->id_inowait) || - !LIST_EMPTY(&inodedep->id_bufwait) || !TAILQ_EMPTY(&inodedep->id_extupdt) || !TAILQ_EMPTY(&inodedep->id_newextupdt) || !TAILQ_EMPTY(&inodedep->id_inoupdt) || !TAILQ_EMPTY(&inodedep->id_newinoupdt)) - panic("softdep_fsync: pending ops"); + panic("softdep_fsync: pending ops %p", inodedep); for (error = 0, flushparent = 0; ; ) { if ((wk = LIST_FIRST(&inodedep->id_pendinghd)) == NULL) break; @@ -5254,8 +8515,8 @@ int softdep_sync_metadata(struct vnode *vp) { struct pagedep *pagedep; - struct allocdirect *adp; struct allocindir *aip; + struct newblk *newblk; struct buf *bp, *nbp; struct worklist *wk; struct bufobj *bo; @@ -5319,27 +8580,15 @@ loop: switch (wk->wk_type) { case D_ALLOCDIRECT: - adp = WK_ALLOCDIRECT(wk); - if (adp->ad_state & DEPCOMPLETE) - continue; - nbp = adp->ad_buf; - nbp = getdirtybuf(nbp, &lk, waitfor); - if (nbp == NULL) - continue; - FREE_LOCK(&lk); - if (waitfor == MNT_NOWAIT) { - bawrite(nbp); - } else if ((error = bwrite(nbp)) != 0) { - break; + case D_ALLOCINDIR: + newblk = WK_NEWBLK(wk); + if (newblk->nb_jnewblk != NULL) { + jwait(&newblk->nb_jnewblk->jn_list); + goto restart; } - ACQUIRE_LOCK(&lk); - continue; - - case D_ALLOCINDIR: - aip = WK_ALLOCINDIR(wk); - if (aip->ai_state & DEPCOMPLETE) + if (newblk->nb_state & DEPCOMPLETE) continue; - nbp = aip->ai_buf; + nbp = newblk->nb_bmsafemap->sm_buf; nbp = getdirtybuf(nbp, &lk, waitfor); if (nbp == NULL) continue; @@ -5355,10 +8604,16 @@ loop: case D_INDIRDEP: restart: - LIST_FOREACH(aip, &WK_INDIRDEP(wk)->ir_deplisthd, ai_next) { - if (aip->ai_state & DEPCOMPLETE) + LIST_FOREACH(aip, + &WK_INDIRDEP(wk)->ir_deplisthd, ai_next) { + newblk = (struct newblk *)aip; + if (newblk->nb_jnewblk != NULL) { + jwait(&newblk->nb_jnewblk->jn_list); + goto restart; + } + if (newblk->nb_state & DEPCOMPLETE) continue; - nbp = aip->ai_buf; + nbp = newblk->nb_bmsafemap->sm_buf; nbp = getdirtybuf(nbp, &lk, MNT_WAIT); if (nbp == NULL) goto restart; @@ -5489,7 +8744,8 @@ loop: BO_LOCK(bo); drain_output(vp); BO_UNLOCK(bo); - return (0); + return ffs_update(vp, 1); + /* return (0); */ } /* @@ -5502,7 +8758,9 @@ flush_inodedep_deps(mp, ino) ino_t ino; { struct inodedep *inodedep; + struct jaddref *jaddref; int error, waitfor; + int loops = 0; /* * This work is done in two passes. The first pass grabs most @@ -5522,8 +8780,17 @@ flush_inodedep_deps(mp, ino) return (error); FREE_LOCK(&lk); ACQUIRE_LOCK(&lk); +restart: if (inodedep_lookup(mp, ino, 0, &inodedep) == 0) return (0); + LIST_FOREACH(jaddref, &inodedep->id_jaddrefhd, ja_inodeps) { + if (jaddref->ja_state & (GOINGAWAY | COMPLETE)) + continue; + if (++loops > 20) + panic("stuck jaddref: %p\n", jaddref); + jwait(&jaddref->ja_list); + goto restart; + } if (flush_deplist(&inodedep->id_inoupdt, waitfor, &error) || flush_deplist(&inodedep->id_newinoupdt, waitfor, &error) || flush_deplist(&inodedep->id_extupdt, waitfor, &error) || @@ -5555,13 +8822,19 @@ flush_deplist(listhead, waitfor, errorp) int *errorp; { struct allocdirect *adp; + struct newblk *newblk; struct buf *bp; mtx_assert(&lk, MA_OWNED); TAILQ_FOREACH(adp, listhead, ad_next) { - if (adp->ad_state & DEPCOMPLETE) + newblk = (struct newblk *)adp; + if (newblk->nb_jnewblk != NULL) { + jwait(&newblk->nb_jnewblk->jn_list); + return (1); + } + if (newblk->nb_state & DEPCOMPLETE) continue; - bp = adp->ad_buf; + bp = newblk->nb_bmsafemap->sm_buf; bp = getdirtybuf(bp, &lk, waitfor); if (bp == NULL) { if (waitfor == MNT_NOWAIT) @@ -5592,6 +8865,7 @@ flush_pagedep_deps(pvp, mp, diraddhdp) struct diraddhd *diraddhdp; { struct inodedep *inodedep; + struct jaddref *jaddref; struct ufsmount *ump; struct diradd *dap; struct vnode *vp; @@ -5602,6 +8876,7 @@ flush_pagedep_deps(pvp, mp, diraddhdp) struct worklist *wk; ump = VFSTOUFS(mp); +restart: while ((dap = LIST_FIRST(diraddhdp)) != NULL) { /* * Flush ourselves if this directory entry @@ -5633,6 +8908,18 @@ flush_pagedep_deps(pvp, mp, diraddhdp) * happen at most once. */ inum = dap->da_newinum; + if (inodedep_lookup(UFSTOVFS(ump), inum, 0, &inodedep) == 0) + panic("flush_pagedep_deps: lost inode1"); + /* + * Wait for any pending journal adds to complete so we don't + * cause rollbacks while syncing. + */ + LIST_FOREACH(jaddref, &inodedep->id_jaddrefhd, ja_inodeps) { + if (jaddref->ja_state & (GOINGAWAY | COMPLETE)) + continue; + jwait(&jaddref->ja_list); + goto restart; + } if (dap->da_state & MKDIR_BODY) { FREE_LOCK(&lk); if ((error = ffs_vgetf(mp, inum, LK_EXCLUSIVE, &vp, @@ -5690,6 +8977,13 @@ flush_pagedep_deps(pvp, mp, diraddhdp) break; } BO_UNLOCK(bo); + /* + * We have to wait for the direct pointers to point + * at the newdirblk before the dependency will go + * away. + */ + if (dap == LIST_FIRST(diraddhdp)) + ffs_update(vp, 1); vput(vp); if (error != 0) break; /* Flushing of first block failed */ @@ -5720,7 +9014,7 @@ retry: * push them to disk. */ if ((inodedep->id_state & DEPCOMPLETE) == 0) { - bp = inodedep->id_buf; + bp = inodedep->id_bmsafemap->sm_buf; bp = getdirtybuf(bp, &lk, MNT_WAIT); if (bp == NULL) goto retry; @@ -5749,8 +9043,11 @@ retry: * If we have failed to get rid of all the dependencies * then something is seriously wrong. */ - if (dap == LIST_FIRST(diraddhdp)) - panic("flush_pagedep_deps: flush failed"); + if (dap == LIST_FIRST(diraddhdp)) { + inodedep_lookup(UFSTOVFS(ump), inum, 0, &inodedep); + panic("flush_pagedep_deps: failed to flush " + "inodep %p ino %d dap %p", inodedep, inum, dap); + } } if (error) ACQUIRE_LOCK(&lk); @@ -6100,10 +9397,13 @@ softdep_count_dependencies(bp, wantcount) int wantcount; { struct worklist *wk; + struct bmsafemap *bmsafemap; struct inodedep *inodedep; struct indirdep *indirdep; + struct freeblks *freeblks; struct allocindir *aip; struct pagedep *pagedep; + struct dirrem *dirrem; struct diradd *dap; int i, retval; @@ -6132,6 +9432,12 @@ softdep_count_dependencies(bp, wantcount) if (!wantcount) goto out; } + if (LIST_FIRST(&inodedep->id_jaddrefhd)) { + /* Add reference dependency. */ + retval += 1; + if (!wantcount) + goto out; + } continue; case D_INDIRDEP: @@ -6147,6 +9453,14 @@ softdep_count_dependencies(bp, wantcount) case D_PAGEDEP: pagedep = WK_PAGEDEP(wk); + LIST_FOREACH(dirrem, &pagedep->pd_dirremhd, dm_next) { + if (LIST_FIRST(&dirrem->dm_jremrefhd)) { + /* Journal remove ref dependency. */ + retval += 1; + if (!wantcount) + goto out; + } + } for (i = 0; i < DAHASHSZ; i++) { LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist) { @@ -6159,14 +9473,43 @@ softdep_count_dependencies(bp, wantcount) continue; case D_BMSAFEMAP: + bmsafemap = WK_BMSAFEMAP(wk); + if (LIST_FIRST(&bmsafemap->sm_jaddrefhd)) { + /* Add reference dependency. */ + retval += 1; + if (!wantcount) + goto out; + } + if (LIST_FIRST(&bmsafemap->sm_jnewblkhd)) { + /* Allocate block dependency. */ + retval += 1; + if (!wantcount) + goto out; + } + continue; + + case D_FREEBLKS: + freeblks = WK_FREEBLKS(wk); + if (LIST_FIRST(&freeblks->fb_jfreeblkhd)) { + /* Freeblk journal dependency. */ + retval += 1; + if (!wantcount) + goto out; + } + continue; + + case D_FREEWORK: + case D_FREEDEP: + case D_JSEGDEP: case D_ALLOCDIRECT: case D_ALLOCINDIR: case D_MKDIR: + case D_JSEG: /* never a dependency on these blocks */ continue; default: - panic("softdep_check_for_rollback: Unexpected type %s", + panic("softdep_count_dependencies: Unexpected type %s", TYPENAME(wk->wk_type)); /* NOTREACHED */ } @@ -6382,6 +9725,43 @@ softdep_error(func, error) #ifdef DDB +static void +inodedep_print(struct inodedep *inodedep, int verbose) +{ + db_printf("%p fs %p st %x ino %jd inoblk %jd delta %d saveino %p\n", + inodedep, inodedep->id_fs, inodedep->id_state, + (intmax_t)inodedep->id_ino, + (intmax_t)fsbtodb(inodedep->id_fs, + ino_to_fsba(inodedep->id_fs, inodedep->id_ino)), + inodedep->id_nlinkdelta, inodedep->id_savedino1); + + if (verbose == 0) + return; + + db_printf("\tpendinghd %p, bufwait %p, inowait %p, jaddrefhd %p, " + "mkdiradd %p\n", + LIST_FIRST(&inodedep->id_pendinghd), + LIST_FIRST(&inodedep->id_bufwait), + LIST_FIRST(&inodedep->id_inowait), + LIST_FIRST(&inodedep->id_jaddrefhd), + inodedep->id_mkdiradd); + db_printf("\tinoupdt %p, newinoupdt %p, extupdt %p, newextupdt %p\n", + TAILQ_FIRST(&inodedep->id_inoupdt), + TAILQ_FIRST(&inodedep->id_newinoupdt), + TAILQ_FIRST(&inodedep->id_extupdt), + TAILQ_FIRST(&inodedep->id_newextupdt)); +} + +DB_SHOW_COMMAND(inodedep, db_show_inodedep) +{ + + if (have_addr == 0) { + db_printf("Address required\n"); + return; + } + inodedep_print((struct inodedep*)addr, 1); +} + DB_SHOW_COMMAND(inodedeps, db_show_inodedeps) { struct inodedep_hashhead *inodedephd; @@ -6395,15 +9775,62 @@ DB_SHOW_COMMAND(inodedeps, db_show_inodedeps) LIST_FOREACH(inodedep, inodedephd, id_hash) { if (fs != NULL && fs != inodedep->id_fs) continue; - db_printf("%p fs %p st %x ino %jd inoblk %jd\n", - inodedep, inodedep->id_fs, inodedep->id_state, - (intmax_t)inodedep->id_ino, - (intmax_t)fsbtodb(inodedep->id_fs, - ino_to_fsba(inodedep->id_fs, inodedep->id_ino))); + inodedep_print(inodedep, 0); } } } +DB_SHOW_COMMAND(worklist, db_show_worklist) +{ + struct worklist *wk; + + if (have_addr == 0) { + db_printf("Address required\n"); + return; + } + wk = (struct worklist *)addr; + printf("worklist: %p type %s state 0x%X\n", + wk, TYPENAME(wk->wk_type), wk->wk_state); +} + +DB_SHOW_COMMAND(workhead, db_show_workhead) +{ + struct workhead *wkhd; + struct worklist *wk; + int i; + + if (have_addr == 0) { + db_printf("Address required\n"); + return; + } + wkhd = (struct workhead *)addr; + wk = LIST_FIRST(wkhd); + for (i = 0; i < 100 && wk != NULL; i++, wk = LIST_NEXT(wk, wk_list)) + db_printf("worklist: %p type %s state 0x%X", + wk, TYPENAME(wk->wk_type), wk->wk_state); + if (i == 100) + db_printf("workhead overflow"); + printf("\n"); +} + + +DB_SHOW_COMMAND(mkdirs, db_show_mkdirs) +{ + struct jaddref *jaddref; + struct diradd *diradd; + struct mkdir *mkdir; + + LIST_FOREACH(mkdir, &mkdirlisthd, md_mkdirs) { + diradd = mkdir->md_diradd; + db_printf("mkdir: %p state 0x%X dap %p state 0x%X", + mkdir, mkdir->md_state, diradd, diradd->da_state); + if ((jaddref = mkdir->md_jaddref) != NULL) + db_printf(" jaddref %p jaddref state 0x%X", + jaddref, jaddref->ja_state); + db_printf("\n"); + } +} + #endif /* DDB */ #endif /* SOFTUPDATES */ Index: /usr/src/sys/ufs/ffs/ffs_alloc.c =================================================================== --- /usr/src/sys/ufs/ffs/ffs_alloc.c (revision 200709) +++ /usr/src/sys/ufs/ffs/ffs_alloc.c (working copy) @@ -89,23 +89,23 @@ __FBSDID("$FreeBSD$"); #include typedef ufs2_daddr_t allocfcn_t(struct inode *ip, int cg, ufs2_daddr_t bpref, - int size); + int size, int rsize); -static ufs2_daddr_t ffs_alloccg(struct inode *, int, ufs2_daddr_t, int); +static ufs2_daddr_t ffs_alloccg(struct inode *, int, ufs2_daddr_t, int, int); static ufs2_daddr_t - ffs_alloccgblk(struct inode *, struct buf *, ufs2_daddr_t); + ffs_alloccgblk(struct inode *, struct buf *, ufs2_daddr_t, int); #ifdef INVARIANTS static int ffs_checkblk(struct inode *, ufs2_daddr_t, long); #endif -static ufs2_daddr_t ffs_clusteralloc(struct inode *, int, ufs2_daddr_t, int); -static void ffs_clusteracct(struct ufsmount *, struct fs *, struct cg *, - ufs1_daddr_t, int); +static ufs2_daddr_t ffs_clusteralloc(struct inode *, int, ufs2_daddr_t, int, + int); static ino_t ffs_dirpref(struct inode *); static ufs2_daddr_t ffs_fragextend(struct inode *, int, ufs2_daddr_t, int, int); static void ffs_fserr(struct fs *, ino_t, char *); static ufs2_daddr_t ffs_hashalloc - (struct inode *, int, ufs2_daddr_t, int, allocfcn_t *); -static ufs2_daddr_t ffs_nodealloccg(struct inode *, int, ufs2_daddr_t, int); + (struct inode *, int, ufs2_daddr_t, int, int, allocfcn_t *); +static ufs2_daddr_t ffs_nodealloccg(struct inode *, int, ufs2_daddr_t, int, + int); static ufs1_daddr_t ffs_mapsearch(struct fs *, struct cg *, ufs2_daddr_t, int); static int ffs_reallocblks_ufs1(struct vop_reallocblks_args *); static int ffs_reallocblks_ufs2(struct vop_reallocblks_args *); @@ -182,7 +182,7 @@ retry: cg = ino_to_cg(fs, ip->i_number); else cg = dtog(fs, bpref); - bno = ffs_hashalloc(ip, cg, bpref, size, ffs_alloccg); + bno = ffs_hashalloc(ip, cg, bpref, size, size, ffs_alloccg); if (bno > 0) { delta = btodb(size); if (ip->i_flag & IN_SPACECOUNTED) { @@ -380,16 +380,12 @@ retry: panic("ffs_realloccg: bad optim"); /* NOTREACHED */ } - bno = ffs_hashalloc(ip, cg, bpref, request, ffs_alloccg); + bno = ffs_hashalloc(ip, cg, bpref, request, nsize, ffs_alloccg); if (bno > 0) { bp->b_blkno = fsbtodb(fs, bno); if (!DOINGSOFTDEP(vp)) ffs_blkfree(ump, fs, ip->i_devvp, bprev, (long)osize, - ip->i_number); - if (nsize < request) - ffs_blkfree(ump, fs, ip->i_devvp, - bno + numfrags(fs, nsize), - (long)(request - nsize), ip->i_number); + ip->i_number, NULL); delta = btodb(nsize - osize); if (ip->i_flag & IN_SPACECOUNTED) { UFS_LOCK(ump); @@ -493,7 +489,7 @@ ffs_reallocblks_ufs1(ap) struct fs *fs; struct inode *ip; struct vnode *vp; - struct buf *sbp, *ebp; + struct buf *sbp, *ebp, *bp; ufs1_daddr_t *bap, *sbap, *ebap = 0; struct cluster_save *buflist; struct ufsmount *ump; @@ -501,7 +497,7 @@ ffs_reallocblks_ufs1(ap) ufs1_daddr_t soff, newblk, blkno; ufs2_daddr_t pref; struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; - int i, len, start_lvl, end_lvl, ssize; + int i, len, start_lvl, end_lvl, ssize, lvl; vp = ap->a_vp; ip = VTOI(vp); @@ -578,7 +574,7 @@ ffs_reallocblks_ufs1(ap) * Search the block map looking for an allocation of the desired size. */ if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref, - len, ffs_clusteralloc)) == 0) { + len, len, ffs_clusteralloc)) == 0) { UFS_UNLOCK(ump); goto fail; } @@ -594,11 +590,17 @@ ffs_reallocblks_ufs1(ap) printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number, (intmax_t)start_lbn, (intmax_t)end_lbn); #endif + idp = start_ap; + bp = sbp; blkno = newblk; + lvl = start_lvl - 1; for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { if (i == ssize) { bap = ebap; - soff = -i; + idp = end_ap; + bp = ebp; + lvl = end_lvl - 1; + idp[lvl].in_off = 0; } #ifdef INVARIANTS if (!ffs_checkblk(ip, @@ -614,12 +616,13 @@ ffs_reallocblks_ufs1(ap) if (DOINGSOFTDEP(vp)) { if (sbap == &ip->i_din1->di_db[0] && i < ssize) softdep_setup_allocdirect(ip, start_lbn + i, - blkno, *bap, fs->fs_bsize, fs->fs_bsize, - buflist->bs_children[i]); + start_lbn + i, blkno, *bap, fs->fs_bsize, + fs->fs_bsize, buflist->bs_children[i]); else - softdep_setup_allocindir_page(ip, start_lbn + i, - i < ssize ? sbp : ebp, soff + i, blkno, - *bap, buflist->bs_children[i]); + softdep_setup_allocindir_page(ip, bp, idp, + lvl, start_lbn + i, blkno, *bap, + buflist->bs_children[i]); + idp[lvl].in_off++; } *bap++ = blkno; } @@ -664,7 +667,7 @@ ffs_reallocblks_ufs1(ap) if (!DOINGSOFTDEP(vp)) ffs_blkfree(ump, fs, ip->i_devvp, dbtofsb(fs, buflist->bs_children[i]->b_blkno), - fs->fs_bsize, ip->i_number); + fs->fs_bsize, ip->i_number, NULL); buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); #ifdef INVARIANTS if (!ffs_checkblk(ip, @@ -702,14 +705,14 @@ ffs_reallocblks_ufs2(ap) struct fs *fs; struct inode *ip; struct vnode *vp; - struct buf *sbp, *ebp; + struct buf *sbp, *ebp, *bp; ufs2_daddr_t *bap, *sbap, *ebap = 0; struct cluster_save *buflist; struct ufsmount *ump; ufs_lbn_t start_lbn, end_lbn; ufs2_daddr_t soff, newblk, blkno, pref; struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; - int i, len, start_lvl, end_lvl, ssize; + int i, len, start_lvl, end_lvl, ssize, lvl; vp = ap->a_vp; ip = VTOI(vp); @@ -786,7 +789,7 @@ ffs_reallocblks_ufs2(ap) * Search the block map looking for an allocation of the desired size. */ if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref, - len, ffs_clusteralloc)) == 0) { + len, len, ffs_clusteralloc)) == 0) { UFS_UNLOCK(ump); goto fail; } @@ -802,11 +805,17 @@ ffs_reallocblks_ufs2(ap) printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number, (intmax_t)start_lbn, (intmax_t)end_lbn); #endif + idp = start_ap; + bp = sbp; blkno = newblk; + lvl = start_lvl - 1; for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { if (i == ssize) { bap = ebap; - soff = -i; + idp = end_ap; + bp = ebp; + lvl = end_lvl - 1; + idp[lvl].in_off = 0; } #ifdef INVARIANTS if (!ffs_checkblk(ip, @@ -822,12 +831,13 @@ ffs_reallocblks_ufs2(ap) if (DOINGSOFTDEP(vp)) { if (sbap == &ip->i_din2->di_db[0] && i < ssize) softdep_setup_allocdirect(ip, start_lbn + i, - blkno, *bap, fs->fs_bsize, fs->fs_bsize, - buflist->bs_children[i]); + start_lbn + i, blkno, *bap, fs->fs_bsize, + fs->fs_bsize, buflist->bs_children[i]); else - softdep_setup_allocindir_page(ip, start_lbn + i, - i < ssize ? sbp : ebp, soff + i, blkno, - *bap, buflist->bs_children[i]); + softdep_setup_allocindir_page(ip, bp, idp, + lvl, start_lbn + i, blkno, *bap, + buflist->bs_children[i]); + idp[lvl].in_off++; } *bap++ = blkno; } @@ -872,7 +882,7 @@ ffs_reallocblks_ufs2(ap) if (!DOINGSOFTDEP(vp)) ffs_blkfree(ump, fs, ip->i_devvp, dbtofsb(fs, buflist->bs_children[i]->b_blkno), - fs->fs_bsize, ip->i_number); + fs->fs_bsize, ip->i_number, NULL); buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); #ifdef INVARIANTS if (!ffs_checkblk(ip, @@ -959,7 +969,7 @@ ffs_valloc(pvp, mode, cred, vpp) if (fs->fs_contigdirs[cg] > 0) fs->fs_contigdirs[cg]--; } - ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode, + ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode, 0, (allocfcn_t *)ffs_nodealloccg); if (ino == 0) goto noinodes; @@ -1268,11 +1278,12 @@ ffs_blkpref_ufs2(ip, lbn, indx, bap) */ /*VARARGS5*/ static ufs2_daddr_t -ffs_hashalloc(ip, cg, pref, size, allocator) +ffs_hashalloc(ip, cg, pref, size, rsize, allocator) struct inode *ip; int cg; ufs2_daddr_t pref; - int size; /* size for data blocks, mode for inodes */ + int size; /* Search size for data blocks, mode for inodes */ + int rsize; /* Real allocated size. */ allocfcn_t *allocator; { struct fs *fs; @@ -1288,7 +1299,7 @@ static ufs2_daddr_t /* * 1: preferred cylinder group */ - result = (*allocator)(ip, cg, pref, size); + result = (*allocator)(ip, cg, pref, size, rsize); if (result) return (result); /* @@ -1298,7 +1309,7 @@ static ufs2_daddr_t cg += i; if (cg >= fs->fs_ncg) cg -= fs->fs_ncg; - result = (*allocator)(ip, cg, 0, size); + result = (*allocator)(ip, cg, 0, size, rsize); if (result) return (result); } @@ -1309,7 +1320,7 @@ static ufs2_daddr_t */ cg = (icg + 2) % fs->fs_ncg; for (i = 2; i < fs->fs_ncg; i++) { - result = (*allocator)(ip, cg, 0, size); + result = (*allocator)(ip, cg, 0, size, rsize); if (result) return (result); cg++; @@ -1391,7 +1402,8 @@ ffs_fragextend(ip, cg, bprev, osize, nsize) ACTIVECLEAR(fs, cg); UFS_UNLOCK(ump); if (DOINGSOFTDEP(ITOV(ip))) - softdep_setup_blkmapdep(bp, UFSTOVFS(ump), bprev); + softdep_setup_blkmapdep(bp, UFSTOVFS(ump), bprev, + frags, numfrags(fs, osize)); bdwrite(bp); return (bprev); @@ -1409,11 +1421,12 @@ fail: * and if it is, allocate it. */ static ufs2_daddr_t -ffs_alloccg(ip, cg, bpref, size) +ffs_alloccg(ip, cg, bpref, size, rsize) struct inode *ip; int cg; ufs2_daddr_t bpref; int size; + int rsize; { struct fs *fs; struct cg *cgp; @@ -1441,7 +1454,7 @@ static ufs2_daddr_t cgp->cg_old_time = cgp->cg_time = time_second; if (size == fs->fs_bsize) { UFS_LOCK(ump); - blkno = ffs_alloccgblk(ip, bp, bpref); + blkno = ffs_alloccgblk(ip, bp, bpref, rsize); ACTIVECLEAR(fs, cg); UFS_UNLOCK(ump); bdwrite(bp); @@ -1465,21 +1478,14 @@ static ufs2_daddr_t if (cgp->cg_cs.cs_nbfree == 0) goto fail; UFS_LOCK(ump); - blkno = ffs_alloccgblk(ip, bp, bpref); - bno = dtogd(fs, blkno); - for (i = frags; i < fs->fs_frag; i++) - setbit(blksfree, bno + i); - i = fs->fs_frag - frags; - cgp->cg_cs.cs_nffree += i; - fs->fs_cstotal.cs_nffree += i; - fs->fs_cs(fs, cg).cs_nffree += i; - fs->fs_fmod = 1; - cgp->cg_frsum[i]++; + blkno = ffs_alloccgblk(ip, bp, bpref, rsize); ACTIVECLEAR(fs, cg); UFS_UNLOCK(ump); bdwrite(bp); return (blkno); } + KASSERT(size == rsize, + ("ffs_alloccg: size(%d) != rsize(%d)", size, rsize)); bno = ffs_mapsearch(fs, cgp, bpref, allocsiz); if (bno < 0) goto fail; @@ -1497,7 +1503,7 @@ static ufs2_daddr_t ACTIVECLEAR(fs, cg); UFS_UNLOCK(ump); if (DOINGSOFTDEP(ITOV(ip))) - softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno); + softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno, frags, 0); bdwrite(bp); return (blkno); @@ -1519,10 +1525,11 @@ fail: * blocks may be fragmented by the routine that allocates them. */ static ufs2_daddr_t -ffs_alloccgblk(ip, bp, bpref) +ffs_alloccgblk(ip, bp, bpref, size) struct inode *ip; struct buf *bp; ufs2_daddr_t bpref; + int size; { struct fs *fs; struct cg *cgp; @@ -1530,6 +1537,7 @@ static ufs2_daddr_t ufs1_daddr_t bno; ufs2_daddr_t blkno; u_int8_t *blksfree; + int i; fs = ip->i_fs; ump = ip->i_ump; @@ -1557,16 +1565,32 @@ static ufs2_daddr_t gotit: blkno = fragstoblks(fs, bno); ffs_clrblock(fs, blksfree, (long)blkno); - ffs_clusteracct(ump, fs, cgp, blkno, -1); + ffs_clusteracct(fs, cgp, blkno, -1); cgp->cg_cs.cs_nbfree--; fs->fs_cstotal.cs_nbfree--; fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--; fs->fs_fmod = 1; blkno = cgbase(fs, cgp->cg_cgx) + bno; + /* + * If the caller didn't want the whole block free the frags here. + */ + size = numfrags(fs, size); + if (size != fs->fs_frag) { + bno = dtogd(fs, blkno); + for (i = size; i < fs->fs_frag; i++) + setbit(blksfree, bno + i); + i = fs->fs_frag - size; + cgp->cg_cs.cs_nffree += i; + fs->fs_cstotal.cs_nffree += i; + fs->fs_cs(fs, cgp->cg_cgx).cs_nffree += i; + fs->fs_fmod = 1; + cgp->cg_frsum[i]++; + } /* XXX Fixme. */ UFS_UNLOCK(ump); if (DOINGSOFTDEP(ITOV(ip))) - softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno); + softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno, + size, 0); UFS_LOCK(ump); return (blkno); } @@ -1579,11 +1603,12 @@ gotit: * take the first one that we find following bpref. */ static ufs2_daddr_t -ffs_clusteralloc(ip, cg, bpref, len) +ffs_clusteralloc(ip, cg, bpref, len, unused) struct inode *ip; int cg; ufs2_daddr_t bpref; int len; + int unused; { struct fs *fs; struct cg *cgp; @@ -1679,7 +1704,7 @@ static ufs2_daddr_t len = blkstofrags(fs, len); UFS_LOCK(ump); for (i = 0; i < len; i += fs->fs_frag) - if (ffs_alloccgblk(ip, bp, bno + i) != bno + i) + if (ffs_alloccgblk(ip, bp, bno + i, fs->fs_bsize) != bno + i) panic("ffs_clusteralloc: lost block"); ACTIVECLEAR(fs, cg); UFS_UNLOCK(ump); @@ -1703,11 +1728,12 @@ fail: * inode in the specified cylinder group. */ static ufs2_daddr_t -ffs_nodealloccg(ip, cg, ipref, mode) +ffs_nodealloccg(ip, cg, ipref, mode, unused) struct inode *ip; int cg; ufs2_daddr_t ipref; int mode; + int unused; { struct fs *fs; struct cg *cgp; @@ -1810,28 +1836,6 @@ gotit: } /* - * check if a block is free - */ -static int -ffs_isfreeblock(struct fs *fs, u_char *cp, ufs1_daddr_t h) -{ - - switch ((int)fs->fs_frag) { - case 8: - return (cp[h] == 0); - case 4: - return ((cp[h >> 1] & (0x0f << ((h & 0x1) << 2))) == 0); - case 2: - return ((cp[h >> 2] & (0x03 << ((h & 0x3) << 1))) == 0); - case 1: - return ((cp[h >> 3] & (0x01 << (h & 0x7))) == 0); - default: - panic("ffs_isfreeblock"); - } - return (0); -} - -/* * Free a block or fragment. * * The specified block or fragment is placed back in the @@ -1839,13 +1843,14 @@ gotit: * block reassembly is checked. */ void -ffs_blkfree(ump, fs, devvp, bno, size, inum) +ffs_blkfree(ump, fs, devvp, bno, size, inum, dephd) struct ufsmount *ump; struct fs *fs; struct vnode *devvp; ufs2_daddr_t bno; long size; ino_t inum; + struct workhead *dephd; { struct cg *cgp; struct buf *bp; @@ -1912,7 +1917,7 @@ void panic("ffs_blkfree: freeing free block"); } ffs_setblock(fs, blksfree, fragno); - ffs_clusteracct(ump, fs, cgp, fragno, 1); + ffs_clusteracct(fs, cgp, fragno, 1); cgp->cg_cs.cs_nbfree++; fs->fs_cstotal.cs_nbfree++; fs->fs_cs(fs, cg).cs_nbfree++; @@ -1952,7 +1957,7 @@ void cgp->cg_cs.cs_nffree -= fs->fs_frag; fs->fs_cstotal.cs_nffree -= fs->fs_frag; fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag; - ffs_clusteracct(ump, fs, cgp, fragno, 1); + ffs_clusteracct(fs, cgp, fragno, 1); cgp->cg_cs.cs_nbfree++; fs->fs_cstotal.cs_nbfree++; fs->fs_cs(fs, cg).cs_nbfree++; @@ -1961,6 +1966,9 @@ void fs->fs_fmod = 1; ACTIVECLEAR(fs, cg); UFS_UNLOCK(ump); + if (UFSTOVFS(ump)->mnt_flag & MNT_SOFTDEP) + softdep_setup_blkfree(UFSTOVFS(ump), bp, bno, + numfrags(fs, size), dephd); bdwrite(bp); } @@ -2031,7 +2039,8 @@ ffs_vfree(pvp, ino, mode) return (0); } ip = VTOI(pvp); - return (ffs_freefile(ip->i_ump, ip->i_fs, ip->i_devvp, ino, mode)); + return (ffs_freefile(ip->i_ump, ip->i_fs, ip->i_devvp, ino, mode, + NULL)); } /* @@ -2039,12 +2048,13 @@ ffs_vfree(pvp, ino, mode) * The specified inode is placed back in the free map. */ int -ffs_freefile(ump, fs, devvp, ino, mode) +ffs_freefile(ump, fs, devvp, ino, mode, wkhd) struct ufsmount *ump; struct fs *fs; struct vnode *devvp; ino_t ino; int mode; + struct workhead *wkhd; { struct cg *cgp; struct buf *bp; @@ -2100,6 +2110,9 @@ int fs->fs_fmod = 1; ACTIVECLEAR(fs, cg); UFS_UNLOCK(ump); + if (UFSTOVFS(ump)->mnt_flag & MNT_SOFTDEP) + softdep_setup_inofree(UFSTOVFS(ump), bp, + ino + cg * fs->fs_ipg, wkhd); bdwrite(bp); return (0); } @@ -2213,101 +2226,6 @@ ffs_mapsearch(fs, cgp, bpref, allocsiz) } /* - * Update the cluster map because of an allocation or free. - * - * Cnt == 1 means free; cnt == -1 means allocating. - */ -void -ffs_clusteracct(ump, fs, cgp, blkno, cnt) - struct ufsmount *ump; - struct fs *fs; - struct cg *cgp; - ufs1_daddr_t blkno; - int cnt; -{ - int32_t *sump; - int32_t *lp; - u_char *freemapp, *mapp; - int i, start, end, forw, back, map, bit; - - mtx_assert(UFS_MTX(ump), MA_OWNED); - - if (fs->fs_contigsumsize <= 0) - return; - freemapp = cg_clustersfree(cgp); - sump = cg_clustersum(cgp); - /* - * Allocate or clear the actual block. - */ - if (cnt > 0) - setbit(freemapp, blkno); - else - clrbit(freemapp, blkno); - /* - * Find the size of the cluster going forward. - */ - start = blkno + 1; - end = start + fs->fs_contigsumsize; - if (end >= cgp->cg_nclusterblks) - end = cgp->cg_nclusterblks; - mapp = &freemapp[start / NBBY]; - map = *mapp++; - bit = 1 << (start % NBBY); - for (i = start; i < end; i++) { - if ((map & bit) == 0) - break; - if ((i & (NBBY - 1)) != (NBBY - 1)) { - bit <<= 1; - } else { - map = *mapp++; - bit = 1; - } - } - forw = i - start; - /* - * Find the size of the cluster going backward. - */ - start = blkno - 1; - end = start - fs->fs_contigsumsize; - if (end < 0) - end = -1; - mapp = &freemapp[start / NBBY]; - map = *mapp--; - bit = 1 << (start % NBBY); - for (i = start; i > end; i--) { - if ((map & bit) == 0) - break; - if ((i & (NBBY - 1)) != 0) { - bit >>= 1; - } else { - map = *mapp--; - bit = 1 << (NBBY - 1); - } - } - back = start - i; - /* - * Account for old cluster and the possibly new forward and - * back clusters. - */ - i = back + forw + 1; - if (i > fs->fs_contigsumsize) - i = fs->fs_contigsumsize; - sump[i] += cnt; - if (back > 0) - sump[back] -= cnt; - if (forw > 0) - sump[forw] -= cnt; - /* - * Update cluster summary information. - */ - lp = &sump[fs->fs_contigsumsize]; - for (i = fs->fs_contigsumsize; i > 0; i--) - if (*lp-- > 0) - break; - fs->fs_maxcluster[cgp->cg_cgx] = i; -} - -/* * Fserr prints the name of a filesystem with an error diagnostic. * * The form of the error message is: @@ -2505,7 +2423,7 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) #endif /* DEBUG */ while (cmd.size > 0) { if ((error = ffs_freefile(ump, fs, ump->um_devvp, - cmd.value, filetype))) + cmd.value, filetype, NULL))) break; cmd.size -= 1; cmd.value += 1; @@ -2533,7 +2451,7 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) if (blksize > blkcnt) blksize = blkcnt; ffs_blkfree(ump, fs, ump->um_devvp, blkno, - blksize * fs->fs_fsize, ROOTINO); + blksize * fs->fs_fsize, ROOTINO, NULL); blkno += blksize; blkcnt -= blksize; blksize = fs->fs_frag; Index: /usr/src/sys/ufs/ffs/ffs_extern.h =================================================================== --- /usr/src/sys/ufs/ffs/ffs_extern.h (revision 200709) +++ /usr/src/sys/ufs/ffs/ffs_extern.h (working copy) @@ -56,18 +56,20 @@ int ffs_balloc_ufs2(struct vnode *a_vp, off_t a_st struct ucred *a_cred, int a_flags, struct buf **a_bpp); int ffs_blkatoff(struct vnode *, off_t, char **, struct buf **); void ffs_blkfree(struct ufsmount *, struct fs *, struct vnode *, - ufs2_daddr_t, long, ino_t); + ufs2_daddr_t, long, ino_t, struct workhead *); ufs2_daddr_t ffs_blkpref_ufs1(struct inode *, ufs_lbn_t, int, ufs1_daddr_t *); ufs2_daddr_t ffs_blkpref_ufs2(struct inode *, ufs_lbn_t, int, ufs2_daddr_t *); int ffs_checkfreefile(struct fs *, struct vnode *, ino_t); void ffs_clrblock(struct fs *, u_char *, ufs1_daddr_t); +void ffs_clusteracct(struct fs *, struct cg *, ufs1_daddr_t, int); void ffs_bdflush(struct bufobj *, struct buf *); int ffs_copyonwrite(struct vnode *, struct buf *); int ffs_flushfiles(struct mount *, int, struct thread *); void ffs_fragacct(struct fs *, int, int32_t [], int); int ffs_freefile(struct ufsmount *, struct fs *, struct vnode *, ino_t, - int); + int, struct workhead *); int ffs_isblock(struct fs *, u_char *, ufs1_daddr_t); +int ffs_isfreeblock(struct fs *, u_char *, ufs1_daddr_t); void ffs_load_inode(struct buf *, struct inode *, struct fs *, ino_t); int ffs_mountroot(void); int ffs_reallocblks(struct vop_reallocblks_args *); @@ -108,7 +110,7 @@ void softdep_initialize(void); void softdep_uninitialize(void); int softdep_mount(struct vnode *, struct mount *, struct fs *, struct ucred *); -void softdep_move_dependencies(struct buf *, struct buf *); +int softdep_move_dependencies(struct buf *, struct buf *); int softdep_flushworklist(struct mount *, int *, struct thread *); int softdep_flushfiles(struct mount *, int, struct thread *); void softdep_update_inodeblock(struct inode *, struct buf *, int); @@ -117,15 +119,21 @@ void softdep_freefile(struct vnode *, ino_t, int); int softdep_request_cleanup(struct fs *, struct vnode *); void softdep_setup_freeblocks(struct inode *, off_t, int); void softdep_setup_inomapdep(struct buf *, struct inode *, ino_t); -void softdep_setup_blkmapdep(struct buf *, struct mount *, ufs2_daddr_t); -void softdep_setup_allocdirect(struct inode *, ufs_lbn_t, ufs2_daddr_t, +void softdep_setup_blkmapdep(struct buf *, struct mount *, ufs2_daddr_t, + int, int); +void softdep_setup_allocdirect(struct inode *, int, ufs_lbn_t, ufs2_daddr_t, ufs2_daddr_t, long, long, struct buf *); void softdep_setup_allocext(struct inode *, ufs_lbn_t, ufs2_daddr_t, ufs2_daddr_t, long, long, struct buf *); void softdep_setup_allocindir_meta(struct buf *, struct inode *, - struct buf *, int, ufs2_daddr_t); -void softdep_setup_allocindir_page(struct inode *, ufs_lbn_t, - struct buf *, int, ufs2_daddr_t, ufs2_daddr_t, struct buf *); + struct buf *, struct indir *, int, ufs2_daddr_t); +void softdep_setup_allocindir_page(struct inode *, struct buf *, + struct indir *, int, ufs_lbn_t, ufs2_daddr_t, ufs2_daddr_t, + struct buf *); +void softdep_setup_blkfree(struct mount *, struct buf *, ufs2_daddr_t, int, + struct workhead *); +void softdep_setup_inofree(struct mount *, struct buf *, ino_t, + struct workhead *); void softdep_fsync_mountdev(struct vnode *); int softdep_sync_metadata(struct vnode *); int softdep_process_worklist(struct mount *, int); Index: /usr/src/sys/ufs/ffs/ffs_subr.c =================================================================== --- /usr/src/sys/ufs/ffs/ffs_subr.c (revision 200709) +++ /usr/src/sys/ufs/ffs/ffs_subr.c (working copy) @@ -37,7 +37,6 @@ __FBSDID("$FreeBSD$"); #ifndef _KERNEL #include #include -#include "fsck.h" #else #include #include @@ -223,12 +222,43 @@ ffs_isblock(fs, cp, h) mask = 0x01 << (h & 0x7); return ((cp[h >> 3] & mask) == mask); default: +#ifdef _KERNEL panic("ffs_isblock"); +#endif + break; } return (0); } /* + * check if a block is free + */ +int +ffs_isfreeblock(fs, cp, h) + struct fs *fs; + u_char *cp; + ufs1_daddr_t h; +{ + + switch ((int)fs->fs_frag) { + case 8: + return (cp[h] == 0); + case 4: + return ((cp[h >> 1] & (0x0f << ((h & 0x1) << 2))) == 0); + case 2: + return ((cp[h >> 2] & (0x03 << ((h & 0x3) << 1))) == 0); + case 1: + return ((cp[h >> 3] & (0x01 << (h & 0x7))) == 0); + default: +#ifdef _KERNEL + panic("ffs_isfreeblock"); +#endif + break; + } + return (0); +} + +/* * take a block out of the map */ void @@ -252,7 +282,10 @@ ffs_clrblock(fs, cp, h) cp[h >> 3] &= ~(0x01 << (h & 0x7)); return; default: +#ifdef _KERNEL panic("ffs_clrblock"); +#endif + break; } } @@ -281,6 +314,101 @@ ffs_setblock(fs, cp, h) cp[h >> 3] |= (0x01 << (h & 0x7)); return; default: +#ifdef _KERNEL panic("ffs_setblock"); +#endif + break; } } + +/* + * Update the cluster map because of an allocation or free. + * + * Cnt == 1 means free; cnt == -1 means allocating. + */ +void +ffs_clusteracct(fs, cgp, blkno, cnt) + struct fs *fs; + struct cg *cgp; + ufs1_daddr_t blkno; + int cnt; +{ + int32_t *sump; + int32_t *lp; + u_char *freemapp, *mapp; + int i, start, end, forw, back, map, bit; + + if (fs->fs_contigsumsize <= 0) + return; + freemapp = cg_clustersfree(cgp); + sump = cg_clustersum(cgp); + /* + * Allocate or clear the actual block. + */ + if (cnt > 0) + setbit(freemapp, blkno); + else + clrbit(freemapp, blkno); + /* + * Find the size of the cluster going forward. + */ + start = blkno + 1; + end = start + fs->fs_contigsumsize; + if (end >= cgp->cg_nclusterblks) + end = cgp->cg_nclusterblks; + mapp = &freemapp[start / NBBY]; + map = *mapp++; + bit = 1 << (start % NBBY); + for (i = start; i < end; i++) { + if ((map & bit) == 0) + break; + if ((i & (NBBY - 1)) != (NBBY - 1)) { + bit <<= 1; + } else { + map = *mapp++; + bit = 1; + } + } + forw = i - start; + /* + * Find the size of the cluster going backward. + */ + start = blkno - 1; + end = start - fs->fs_contigsumsize; + if (end < 0) + end = -1; + mapp = &freemapp[start / NBBY]; + map = *mapp--; + bit = 1 << (start % NBBY); + for (i = start; i > end; i--) { + if ((map & bit) == 0) + break; + if ((i & (NBBY - 1)) != 0) { + bit >>= 1; + } else { + map = *mapp--; + bit = 1 << (NBBY - 1); + } + } + back = start - i; + /* + * Account for old cluster and the possibly new forward and + * back clusters. + */ + i = back + forw + 1; + if (i > fs->fs_contigsumsize) + i = fs->fs_contigsumsize; + sump[i] += cnt; + if (back > 0) + sump[back] -= cnt; + if (forw > 0) + sump[forw] -= cnt; + /* + * Update cluster summary information. + */ + lp = &sump[fs->fs_contigsumsize]; + for (i = fs->fs_contigsumsize; i > 0; i--) + if (*lp-- > 0) + break; + fs->fs_maxcluster[cgp->cg_cgx] = i; +} Index: /usr/src/sys/ufs/ffs/softdep.h =================================================================== --- /usr/src/sys/ufs/ffs/softdep.h (revision 200709) +++ /usr/src/sys/ufs/ffs/softdep.h (working copy) @@ -98,18 +98,20 @@ #define UNDONE 0x0002 #define COMPLETE 0x0004 #define DEPCOMPLETE 0x0008 -#define MKDIR_PARENT 0x0010 /* diradd & mkdir only */ -#define MKDIR_BODY 0x0020 /* diradd & mkdir only */ +#define MKDIR_PARENT 0x0010 /* diradd, mkdir, jaddref, jsegdep only */ +#define MKDIR_BODY 0x0020 /* diradd, mkdir, jaddref only */ #define RMDIR 0x0040 /* dirrem only */ -#define DIRCHG 0x0080 /* diradd & dirrem only */ -#define GOINGAWAY 0x0100 /* indirdep only */ -#define IOSTARTED 0x0200 /* inodedep & pagedep only */ +#define DIRCHG 0x0080 /* diradd, dirrem only */ +#define GOINGAWAY 0x0100 /* indirdep, jremref only */ +#define IOSTARTED 0x0200 /* inodedep, pagedep, bmsafemap only */ #define SPACECOUNTED 0x0400 /* inodedep only */ -#define NEWBLOCK 0x0800 /* pagedep only */ +#define NEWBLOCK 0x0800 /* pagedep, jaddref only */ #define INPROGRESS 0x1000 /* dirrem, freeblks, freefrag, freefile only */ #define UFS1FMT 0x2000 /* indirdep only */ #define EXTDATA 0x4000 /* allocdirect only */ #define ONWORKLIST 0x8000 +#define IOWAITING 0x10000 /* Thread is waiting for IO to complete. */ +#define ONDEPLIST 0x20000 /* Structure is on a dependency list. */ #define ALLCOMPLETE (ATTACHED | COMPLETE | DEPCOMPLETE) @@ -135,25 +137,35 @@ * and the macros below changed to use it. */ struct worklist { + LIST_ENTRY(worklist) wk_list; /* list of work requests */ struct mount *wk_mp; /* Mount we live in */ - LIST_ENTRY(worklist) wk_list; /* list of work requests */ - unsigned short wk_type; /* type of request */ - unsigned short wk_state; /* state flags */ + unsigned int wk_type:8, /* type of request */ + wk_state:24; /* state flags */ }; #define WK_DATA(wk) ((void *)(wk)) #define WK_PAGEDEP(wk) ((struct pagedep *)(wk)) #define WK_INODEDEP(wk) ((struct inodedep *)(wk)) #define WK_BMSAFEMAP(wk) ((struct bmsafemap *)(wk)) +#define WK_NEWBLK(wk) ((struct newblk *)(wk)) #define WK_ALLOCDIRECT(wk) ((struct allocdirect *)(wk)) #define WK_INDIRDEP(wk) ((struct indirdep *)(wk)) #define WK_ALLOCINDIR(wk) ((struct allocindir *)(wk)) #define WK_FREEFRAG(wk) ((struct freefrag *)(wk)) #define WK_FREEBLKS(wk) ((struct freeblks *)(wk)) +#define WK_FREEWORK(wk) ((struct freework *)(wk)) #define WK_FREEFILE(wk) ((struct freefile *)(wk)) #define WK_DIRADD(wk) ((struct diradd *)(wk)) #define WK_MKDIR(wk) ((struct mkdir *)(wk)) #define WK_DIRREM(wk) ((struct dirrem *)(wk)) #define WK_NEWDIRBLK(wk) ((struct newdirblk *)(wk)) +#define WK_JADDREF(wk) ((struct jaddref *)(wk)) +#define WK_JREMREF(wk) ((struct jremref *)(wk)) +#define WK_JSEGDEP(wk) ((struct jsegdep *)(wk)) +#define WK_JSEG(wk) ((struct jseg *)(wk)) +#define WK_JNEWBLK(wk) ((struct jnewblk *)(wk)) +#define WK_JFREEBLK(wk) ((struct jfreeblk *)(wk)) +#define WK_FREEDEP(wk) ((struct freedep *)(wk)) +#define WK_JFREEFRAG(wk) ((struct jfreefrag *)(wk)) /* * Various types of lists @@ -165,6 +177,13 @@ LIST_HEAD(inodedephd, inodedep); LIST_HEAD(allocindirhd, allocindir); LIST_HEAD(allocdirecthd, allocdirect); TAILQ_HEAD(allocdirectlst, allocdirect); +LIST_HEAD(indirdephd, indirdep); +LIST_HEAD(jremrefhd, jremref); +LIST_HEAD(jaddrefhd, jaddref); +LIST_HEAD(jnewblkhd, jnewblk); +LIST_HEAD(jfreeblkhd, jfreeblk); +LIST_HEAD(freeworkhd, freework); +TAILQ_HEAD(jseglst, jseg); /* * The "pagedep" structure tracks the various dependencies related to @@ -192,6 +211,7 @@ struct pagedep { LIST_ENTRY(pagedep) pd_hash; /* hashed lookup */ ino_t pd_ino; /* associated file */ ufs_lbn_t pd_lbn; /* block within file */ + struct newdirblk *pd_newdirblk; /* associated newdirblk if NEWBLOCK */ struct dirremhd pd_dirremhd; /* dirrem's waiting for page */ struct diraddhd pd_diraddhd[DAHASHSZ]; /* diradd dir entry updates */ struct diraddhd pd_pendinghd; /* directory entries awaiting write */ @@ -252,7 +272,9 @@ struct inodedep { ino_t id_ino; /* dependent inode */ nlink_t id_nlinkdelta; /* saved effective link count */ LIST_ENTRY(inodedep) id_deps; /* bmsafemap's list of inodedep's */ - struct buf *id_buf; /* related bmsafemap (if pending) */ + struct bmsafemap *id_bmsafemap; /* related bmsafemap (if pending) */ + struct diradd *id_mkdiradd; /* diradd for a mkdir. */ + struct jaddrefhd id_jaddrefhd; /* Journal add refs pending. */ long id_savedextsize; /* ext size saved during rollback */ off_t id_savedsize; /* file size saved during rollback */ struct workhead id_pendinghd; /* entries awaiting directory write */ @@ -271,23 +293,6 @@ struct inodedep { #define id_savedino2 id_un.idu_savedino2 /* - * A "newblk" structure is attached to a bmsafemap structure when a block - * or fragment is allocated from a cylinder group. Its state is set to - * DEPCOMPLETE when its cylinder group map is written. It is consumed by - * an associated allocdirect or allocindir allocation which will attach - * themselves to the bmsafemap structure if the newblk's DEPCOMPLETE flag - * is not set (i.e., its cylinder group map has not been written). - */ -struct newblk { - LIST_ENTRY(newblk) nb_hash; /* hashed lookup */ - struct fs *nb_fs; /* associated filesystem */ - int nb_state; /* state of bitmap dependency */ - ufs2_daddr_t nb_newblkno; /* allocated block number */ - LIST_ENTRY(newblk) nb_deps; /* bmsafemap's list of newblk's */ - struct bmsafemap *nb_bmsafemap; /* associated bmsafemap */ -}; - -/* * A "bmsafemap" structure maintains a list of dependency structures * that depend on the update of a particular cylinder group map. * It has lists for newblks, allocdirects, allocindirs, and inodedeps. @@ -299,14 +304,46 @@ struct inodedep { */ struct bmsafemap { struct worklist sm_list; /* cylgrp buffer */ +# define sm_state sm_list.wk_state + int sm_cg; + LIST_ENTRY(bmsafemap) sm_hash; /* Hash links. */ struct buf *sm_buf; /* associated buffer */ struct allocdirecthd sm_allocdirecthd; /* allocdirect deps */ + struct allocdirecthd sm_allocdirectwr; /* writing allocdirect deps */ struct allocindirhd sm_allocindirhd; /* allocindir deps */ + struct allocindirhd sm_allocindirwr; /* writing allocindir deps */ struct inodedephd sm_inodedephd; /* inodedep deps */ + struct inodedephd sm_inodedepwr; /* writing inodedep deps */ struct newblkhd sm_newblkhd; /* newblk deps */ + struct newblkhd sm_newblkwr; /* writing newblk deps */ + struct jaddrefhd sm_jaddrefhd; /* Pending inode allocations. */ + struct jnewblkhd sm_jnewblkhd; /* Pending block allocations. */ }; /* + * A "newblk" structure is attached to a bmsafemap structure when a block + * or fragment is allocated from a cylinder group. Its state is set to + * DEPCOMPLETE when its cylinder group map is written. It is converted to + * an allocdirect or allocindir allocation once the allocator calls the + * appropriate setup function. + */ +struct newblk { + struct worklist nb_list; +# define nb_state nb_list.wk_state + LIST_ENTRY(newblk) nb_hash; /* hashed lookup */ + LIST_ENTRY(newblk) nb_deps; /* bmsafemap's list of newblks */ + struct jnewblk *nb_jnewblk; /* New block journal entry. */ + struct bmsafemap *nb_bmsafemap;/* cylgrp dep (if pending) */ + struct freefrag *nb_freefrag; /* fragment to be freed (if any) */ + struct indirdephd nb_indirdeps; /* Children indirect blocks. */ + struct workhead nb_newdirblk; /* dir block to notify when written */ + struct workhead nb_jwork; /* Journal work pending. */ + ufs2_daddr_t nb_newblkno; /* new value of block pointer */ + ufs2_daddr_t nb_oldblkno; /* old value of block pointer */ + int nb_offset; /* Pointer offset in parent. */ +}; + +/* * An "allocdirect" structure is attached to an "inodedep" when a new block * or fragment is allocated and pointed to by the inode described by * "inodedep". The worklist is linked to the buffer that holds the block. @@ -334,20 +371,18 @@ struct bmsafemap { * and inodedep->id_pendinghd lists. */ struct allocdirect { - struct worklist ad_list; /* buffer holding block */ -# define ad_state ad_list.wk_state /* block pointer state */ + struct newblk ad_block; /* Common block logic */ +# define ad_state ad_block.nb_list.wk_state /* block pointer state */ TAILQ_ENTRY(allocdirect) ad_next; /* inodedep's list of allocdirect's */ - ufs_lbn_t ad_lbn; /* block within file */ - ufs2_daddr_t ad_newblkno; /* new value of block pointer */ - ufs2_daddr_t ad_oldblkno; /* old value of block pointer */ + struct inodedep *ad_inodedep; /* associated inodedep */ long ad_newsize; /* size of new block */ long ad_oldsize; /* size of old block */ - LIST_ENTRY(allocdirect) ad_deps; /* bmsafemap's list of allocdirect's */ - struct buf *ad_buf; /* cylgrp buffer (if pending) */ - struct inodedep *ad_inodedep; /* associated inodedep */ - struct freefrag *ad_freefrag; /* fragment to be freed (if any) */ - struct workhead ad_newdirblk; /* dir block to notify when written */ }; +#define ad_newblkno ad_block.nb_newblkno +#define ad_oldblkno ad_block.nb_oldblkno +#define ad_offset ad_block.nb_offset +#define ad_freefrag ad_block.nb_freefrag +#define ad_newdirblk ad_block.nb_newdirblk /* * A single "indirdep" structure manages all allocation dependencies for @@ -369,10 +404,14 @@ struct allocdirect { struct indirdep { struct worklist ir_list; /* buffer holding indirect block */ # define ir_state ir_list.wk_state /* indirect block pointer state */ - caddr_t ir_saveddata; /* buffer cache contents */ + LIST_ENTRY(indirdep) ir_next; /* alloc{direct,indir} list */ + caddr_t ir_saveddata; /* buffer cache contents */ struct buf *ir_savebp; /* buffer holding safe copy */ + struct allocindirhd ir_completehd; /* waiting for indirdep complete */ + struct allocindirhd ir_writehd; /* Waiting for the pointer write. */ struct allocindirhd ir_donehd; /* done waiting to update safecopy */ struct allocindirhd ir_deplisthd; /* allocindir deps for this block */ + struct workhead ir_jwork; /* Journal work pending. */ }; /* @@ -389,31 +428,38 @@ struct indirdep { * can then be freed as it is no longer applicable. */ struct allocindir { - struct worklist ai_list; /* buffer holding indirect block */ -# define ai_state ai_list.wk_state /* indirect block pointer state */ + struct newblk ai_block; /* Common block area */ +# define ai_state ai_block.nb_list.wk_state /* indirect pointer state */ LIST_ENTRY(allocindir) ai_next; /* indirdep's list of allocindir's */ - int ai_offset; /* pointer offset in indirect block */ - ufs2_daddr_t ai_newblkno; /* new block pointer value */ - ufs2_daddr_t ai_oldblkno; /* old block pointer value */ - struct freefrag *ai_freefrag; /* block to be freed when complete */ struct indirdep *ai_indirdep; /* address of associated indirdep */ - LIST_ENTRY(allocindir) ai_deps; /* bmsafemap's list of allocindir's */ - struct buf *ai_buf; /* cylgrp buffer (if pending) */ }; +#define ai_newblkno ai_block.nb_newblkno +#define ai_oldblkno ai_block.nb_oldblkno +#define ai_offset ai_block.nb_offset +#define ai_freefrag ai_block.nb_freefrag /* + * The allblk union is used to size the newblk structure on allocation so + * that it may be any one of three types. + */ +union allblk { + struct allocindir ab_allocindir; + struct allocdirect ab_allocdirect; + struct newblk ab_newblk; +}; + +/* * A "freefrag" structure is attached to an "inodedep" when a previously * allocated fragment is replaced with a larger fragment, rather than extended. * The "freefrag" structure is constructed and attached when the replacement * block is first allocated. It is processed after the inode claiming the - * bigger block that replaces it has been written to disk. Note that the - * ff_state field is is used to store the uid, so may lose data. However, - * the uid is used only in printing an error message, so is not critical. - * Keeping it in a short keeps the data structure down to 32 bytes. + * bigger block that replaces it has been written to disk. */ struct freefrag { struct worklist ff_list; /* id_inowait or delayed worklist */ -# define ff_state ff_list.wk_state /* owning user; should be uid_t */ +# define ff_state ff_list.wk_state + struct jfreefrag *ff_jfreefrag; /* Associated journal entry. */ + struct workhead ff_jwork; /* Journal work pending. */ ufs2_daddr_t ff_blkno; /* fragment physical block number */ long ff_fragsize; /* size of fragment being deleted */ ino_t ff_inum; /* owning inode number */ @@ -423,23 +469,60 @@ struct freefrag { * A "freeblks" structure is attached to an "inodedep" when the * corresponding file's length is reduced to zero. It records all * the information needed to free the blocks of a file after its - * zero'ed inode has been written to disk. + * zero'ed inode has been written to disk. The actual work is done + * by child freework structures which are responsible for individual + * inode pointers while freeblks is responsible for retiring the + * entire operation when it is complete and holding common members. */ struct freeblks { struct worklist fb_list; /* id_inowait or delayed worklist */ # define fb_state fb_list.wk_state /* inode and dirty block state */ + struct jfreeblkhd fb_jfreeblkhd; /* Journal entries pending */ + struct workhead fb_freeworkhd; /* Work items pending */ + struct workhead fb_jwork; /* Journal work pending */ ino_t fb_previousinum; /* inode of previous owner of blocks */ uid_t fb_uid; /* uid of previous owner of blocks */ struct vnode *fb_devvp; /* filesystem device vnode */ - long fb_oldextsize; /* previous ext data size */ - off_t fb_oldsize; /* previous file size */ ufs2_daddr_t fb_chkcnt; /* used to check cnt of blks released */ - ufs2_daddr_t fb_dblks[NDADDR]; /* direct blk ptrs to deallocate */ - ufs2_daddr_t fb_iblks[NIADDR]; /* indirect blk ptrs to deallocate */ - ufs2_daddr_t fb_eblks[NXADDR]; /* indirect blk ptrs to deallocate */ + int fb_ref; /* Children outstanding. */ }; /* + * A "freework" structure handles the release of a tree of blocks or a single + * block. Each indirect block in a tree is allocated its own freework + * structure so that the indrect block may be freed only when all of its + * children are freed. In this way we enforce the rule that an allocated + * block must have a valid path to a root that is journaled. Each child + * block acquires a reference and when the ref hits zero the parent ref + * is decremented. If there is no parent the freeblks ref is decremented. + */ +struct freework { + struct worklist fw_list; +# define fw_state fw_list.wk_state + LIST_ENTRY(freework) fw_next; /* Queue for freeblksk. */ + struct freeblks *fw_freeblks; /* Root of operation. */ + struct freework *fw_parent; /* Parent indirect. */ + ufs2_daddr_t fw_blkno; /* Our block #. */ + ufs_lbn_t fw_lbn; /* Original lbn before free. */ + int fw_frags; /* Number of frags. */ + int fw_ref; /* Number of children out. */ + int fw_off; /* Current working position. */ + struct workhead fw_jwork; /* Journal work pending. */ +}; + +/* + * A "freedep" structure is allocated to track the completion of a bitmap + * write for a freework. One freedep may cover many freed blocks so long + * as they reside in the same cylinder group. When the cg is written + * the freedep decrements the ref on the freework which may permit it + * to be freed as well. + */ +struct freedep { + struct worklist fd_list; + struct freework *fd_freework; /* Parent freework. */ +}; + +/* * A "freefile" structure is attached to an inode when its * link count is reduced to zero. It marks the inode as free in * the cylinder group map after the zero'ed inode has been written @@ -450,6 +533,7 @@ struct freefile { mode_t fx_mode; /* mode of inode */ ino_t fx_oldinum; /* inum of the unlinked file */ struct vnode *fx_devvp; /* filesystem device vnode */ + struct workhead fx_jwork; /* journal work pending. */ }; /* @@ -482,12 +566,11 @@ struct freefile { * than zero. * * The overlaying of da_pagedep and da_previous is done to keep the - * structure down to 32 bytes in size on a 32-bit machine. If a - * da_previous entry is present, the pointer to its pagedep is available - * in the associated dirrem entry. If the DIRCHG flag is set, the - * da_previous entry is valid; if not set the da_pagedep entry is valid. - * The DIRCHG flag never changes; it is set when the structure is created - * if appropriate and is never cleared. + * structure down. If a da_previous entry is present, the pointer to its + * pagedep is available in the associated dirrem entry. If the DIRCHG flag + * is set, the da_previous entry is valid; if not set the da_pagedep entry + * is valid. The DIRCHG flag never changes; it is set when the structure + * is created if appropriate and is never cleared. */ struct diradd { struct worklist da_list; /* id_inowait or id_pendinghd list */ @@ -499,6 +582,7 @@ struct diradd { struct dirrem *dau_previous; /* entry being replaced in dir change */ struct pagedep *dau_pagedep; /* pagedep dependency for addition */ } da_un; + struct workhead da_jwork; /* Journal work awaiting completion. */ }; #define da_previous da_un.dau_previous #define da_pagedep da_un.dau_pagedep @@ -525,12 +609,13 @@ struct diradd { * mkdir structures that reference it. The deletion would be faster if the * diradd structure were simply augmented to have two pointers that referenced * the associated mkdir's. However, this would increase the size of the diradd - * structure from 32 to 64-bits to speed a very infrequent operation. + * structure to speed a very infrequent operation. */ struct mkdir { struct worklist md_list; /* id_inowait or buffer holding dir */ # define md_state md_list.wk_state /* type: MKDIR_PARENT or MKDIR_BODY */ struct diradd *md_diradd; /* associated diradd */ + struct jaddref *md_jaddref; /* dependent jaddref. */ struct buf *md_buf; /* MKDIR_BODY: buffer holding dir */ LIST_ENTRY(mkdir) md_mkdirs; /* list of all mkdirs */ }; @@ -542,20 +627,18 @@ LIST_HEAD(mkdirlist, mkdir) mkdirlisthd; * list of the pagedep for the directory page that contains the entry. * It is processed after the directory page with the deleted entry has * been written to disk. - * - * The overlaying of dm_pagedep and dm_dirinum is done to keep the - * structure down to 32 bytes in size on a 32-bit machine. It works - * because they are never used concurrently. */ struct dirrem { struct worklist dm_list; /* delayed worklist */ # define dm_state dm_list.wk_state /* state of the old directory entry */ LIST_ENTRY(dirrem) dm_next; /* pagedep's list of dirrem's */ + struct jremrefhd dm_jremrefhd; /* Pending remove reference deps. */ ino_t dm_oldinum; /* inum of the removed dir entry */ union { struct pagedep *dmu_pagedep; /* pagedep dependency for remove */ ino_t dmu_dirinum; /* parent inode number (for rmdir) */ } dm_un; + struct workhead dm_jwork; /* Journal work awaiting completion. */ }; #define dm_pagedep dm_un.dmu_pagedep #define dm_dirinum dm_un.dmu_dirinum @@ -577,9 +660,151 @@ struct dirrem { * blocks using a similar scheme with the allocindir structures. Rather * than adding this level of complexity, we simply write those newly * allocated indirect blocks synchronously as such allocations are rare. + * In the case of a new directory the . and .. links are tracked with + * a mkdir rather than a pagedep. In this case we track the mkdir + * so it can be released when it is written. A workhead is used + * to simplify canceling a mkdir that is removed by a subsequent dirrem. */ struct newdirblk { struct worklist db_list; /* id_inowait or pg_newdirblk */ # define db_state db_list.wk_state /* unused */ struct pagedep *db_pagedep; /* associated pagedep */ + struct workhead db_mkdir; }; + +/* + * A "jaddref" structure tracks a new reference (link count) on an inode + * and prevents the link count increase and bitmap allocation until a + * journal entry can be written. Once the journal entry is written, + * the inode is put on the pendinghd of the bmsafemap and a diradd or + * mkdir entry is placed on the bufwait list of the inode. The DEPCOMPLETE + * flag is used to indicate that all of the required information for writing + * the journal entry is present. MKDIR_BODY and MKDIR_PARENT are used to + * differentiate . and .. links from regular file names. NEWBLOCK indicates + * a bitmap is still pending. If a new reference is canceled by a delete + * prior to writing the journal the jaddref write is canceled and the + * structure persists to prevent any disk-visible changes until it is + * ultimately released when the file is freed or the link is dropped again. + */ +struct jaddref { + struct worklist ja_list; /* Journal pending or jseg entries. */ +# define ja_state ja_list.wk_state + union { + struct diradd *jau_diradd; /* Pending diradd. */ + struct mkdir *jau_mkdir; /* MKDIR_{PARENT,BODY} */ + } ja_un; + LIST_ENTRY(jaddref) ja_bmdeps; /* Links for bmsafemap. */ + LIST_ENTRY(jaddref) ja_inodeps; /* Links for inodedep. */ + off_t ja_diroff; /* Directory offset. */ + ino_t ja_ino; /* Inode number. */ + ino_t ja_parent; /* Parent inode number. */ + int16_t ja_nlink; /* nlink before addition. */ + uint16_t ja_mode; /* File mode, needed for IFMT. */ +}; +#define ja_diradd ja_un.jau_diradd +#define ja_mkdir ja_un.jau_mkdir + +/* + * A "jremref" structure tracks a removed reference (unlink) on an + * inode and prevents the directory remove from proceeding until the + * journal entry is written. Once the journal has been written the remove + * may proceed as normal. + */ +struct jremref { + struct worklist jr_list; /* Journal pending or jseg entries. */ +# define jr_state jr_list.wk_state + struct dirrem *jr_dirrem; /* Back pointer to dirrem. */ + LIST_ENTRY(jremref) jr_deps; /* Links for pagdep. */ + off_t jr_diroff; /* Directory offset. */ + ino_t jr_ino; /* Inode number. */ + ino_t jr_parent; /* Parent inode number. */ + int16_t jr_nlink; /* nlink before the removal. */ + uint16_t jr_mode; /* File mode, needed for IFMT. */ +}; + +/* + * A "jnewblk" structure tracks a newly allocated block or fragment and + * prevents the direct or indirect block pointer as well as the cg bitmap + * from being written until it is logged. After it is logged the jsegdep + * is attached to the allocdirect or allocindir until the operation is + * completed or reverted. If the operation is reverted prior to the journal + * write the jnewblk structure is maintained to prevent the bitmaps from + * reaching the disk. Ultimately the jnewblk structure will be passed + * to the free routine as the in memory cg is modified back to the free + * state at which time it can be released. + */ +struct jnewblk { + struct worklist jn_list; +# define jn_state jn_list.wk_state + LIST_ENTRY(jnewblk) jn_deps; /* All jnewblks on bmsafemap */ + struct newblk *jn_newblk; + ino_t jn_ino; + ufs_lbn_t jn_lbn; + ufs2_daddr_t jn_blkno; + int jn_oldfrags; + int jn_frags; +}; + +/* + * A "jfreeblk" structure tracks the journal write for freeing a block + * or tree of blocks. The block pointer must not be cleared in the inode + * or indirect prior to the jfreeblk being written. + */ +struct jfreeblk { + struct worklist jf_list; +# define jf_state jf_list.wk_state + struct freeblks *jf_freeblks; + LIST_ENTRY(jfreeblk) jf_deps; + ino_t jf_ino; + ufs_lbn_t jf_lbn; + ufs2_daddr_t jf_blkno; + int jf_frags; +}; + +/* + * A "jfreefrag" tracks the freeing of a single block when a fragment is + * extended or an indirect page is replaced. It is not part of a larger + * freeblks operation. + */ +struct jfreefrag { + struct worklist fr_list; +# define fr_state fr_list.wk_state + struct freefrag *fr_freefrag; + ino_t fr_ino; + ufs_lbn_t fr_lbn; + ufs2_daddr_t fr_blkno; + int fr_frags; +}; + +/* + * A "jsegdep" structure tracks a single reference to a written journal + * segment so the journal space can be reclaimed when all dependencies + * have been written. + */ +struct jsegdep { + struct worklist jd_list; +# define jd_state jd_list.wk_state + struct jseg *jd_seg; + short jd_type; + short jd_line; +}; + +/* + * A "jseg" structure contains all of the journal records written in a + * single disk write. jaddref and jremref structures are linked into + * js_entries so thay may be completed when the write completes. The + * js_deps array contains as many entries as there are ref counts to + * reduce the number of allocations required per journal write to one. + */ +struct jseg { + struct worklist js_list; /* b_deps link for journal */ + struct workhead js_entries; /* Entries awaiting write */ + TAILQ_ENTRY(jseg) js_next; + struct jblocks *js_jblocks; /* Back pointer to block/seg list */ + struct buf *js_buf; /* Buffer while unwritten */ + uint64_t js_seq; + int js_size; /* Allocated size in bytes */ + int js_cnt; /* Total items allocated */ + int js_refs; /* Count of items pending completion */ + struct jsegdep js_deps[0]; /* Dependencies for completion */ +}; Index: /usr/src/sys/ufs/ffs/ffs_balloc.c =================================================================== --- /usr/src/sys/ufs/ffs/ffs_balloc.c (revision 200709) +++ /usr/src/sys/ufs/ffs/ffs_balloc.c (working copy) @@ -138,7 +138,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffse if (error) return (error); if (DOINGSOFTDEP(vp)) - softdep_setup_allocdirect(ip, nb, + softdep_setup_allocdirect(ip, nb, nb, dbtofsb(fs, bp->b_blkno), dp->di_db[nb], fs->fs_bsize, osize, bp); ip->i_size = smalllblktosize(fs, nb + 1); @@ -190,7 +190,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffse if (error) return (error); if (DOINGSOFTDEP(vp)) - softdep_setup_allocdirect(ip, lbn, + softdep_setup_allocdirect(ip, lbn, lbn, dbtofsb(fs, bp->b_blkno), nb, nsize, osize, bp); } @@ -210,7 +210,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffse if (flags & BA_CLRBUF) vfs_bio_clrbuf(bp); if (DOINGSOFTDEP(vp)) - softdep_setup_allocdirect(ip, lbn, newb, 0, + softdep_setup_allocdirect(ip, lbn, lbn, newb, 0, nsize, 0, bp); } dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); @@ -255,7 +255,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffse vfs_bio_clrbuf(bp); if (DOINGSOFTDEP(vp)) { softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, - newb, 0, fs->fs_bsize, 0, bp); + indirs[0].in_lbn, newb, 0, fs->fs_bsize, 0, bp); bdwrite(bp); } else { /* @@ -305,8 +305,8 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffse nbp->b_blkno = fsbtodb(fs, nb); vfs_bio_clrbuf(nbp); if (DOINGSOFTDEP(vp)) { - softdep_setup_allocindir_meta(nbp, ip, bp, - indirs[i - 1].in_off, nb); + softdep_setup_allocindir_meta(nbp, ip, bp, indirs, i, + nb); bdwrite(nbp); } else { /* @@ -361,8 +361,8 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffse if (flags & BA_CLRBUF) vfs_bio_clrbuf(nbp); if (DOINGSOFTDEP(vp)) - softdep_setup_allocindir_page(ip, lbn, bp, - indirs[i].in_off, nb, 0, nbp); + softdep_setup_allocindir_page(ip, bp, indirs, i, lbn, + nb, 0, nbp); bap[indirs[i].in_off] = nb; /* * If required, write synchronously, otherwise use @@ -418,6 +418,8 @@ fail: * slow, running out of disk space is not expected to be a common * occurence. The error return from fsync is ignored as we already * have an error to return to the user. + * + * XXX Still have to journal the free below */ (void) ffs_syncvnode(vp, MNT_WAIT); for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns; @@ -473,7 +475,7 @@ fail: */ for (blkp = allociblk; blkp < allocblk; blkp++) { ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize, - ip->i_number); + ip->i_number, NULL); } return (error); } @@ -643,7 +645,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffse if (error) return (error); if (DOINGSOFTDEP(vp)) - softdep_setup_allocdirect(ip, nb, + softdep_setup_allocdirect(ip, nb, nb, dbtofsb(fs, bp->b_blkno), dp->di_db[nb], fs->fs_bsize, osize, bp); @@ -696,7 +698,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffse if (error) return (error); if (DOINGSOFTDEP(vp)) - softdep_setup_allocdirect(ip, lbn, + softdep_setup_allocdirect(ip, lbn, lbn, dbtofsb(fs, bp->b_blkno), nb, nsize, osize, bp); } @@ -716,7 +718,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffse if (flags & BA_CLRBUF) vfs_bio_clrbuf(bp); if (DOINGSOFTDEP(vp)) - softdep_setup_allocdirect(ip, lbn, newb, 0, + softdep_setup_allocdirect(ip, lbn, lbn, newb, 0, nsize, 0, bp); } dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); @@ -761,7 +763,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffse vfs_bio_clrbuf(bp); if (DOINGSOFTDEP(vp)) { softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, - newb, 0, fs->fs_bsize, 0, bp); + indirs[0].in_lbn, newb, 0, fs->fs_bsize, 0, bp); bdwrite(bp); } else { /* @@ -811,8 +813,8 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffse nbp->b_blkno = fsbtodb(fs, nb); vfs_bio_clrbuf(nbp); if (DOINGSOFTDEP(vp)) { - softdep_setup_allocindir_meta(nbp, ip, bp, - indirs[i - 1].in_off, nb); + softdep_setup_allocindir_meta(nbp, ip, bp, indirs, i, + nb); bdwrite(nbp); } else { /* @@ -867,8 +869,8 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffse if (flags & BA_CLRBUF) vfs_bio_clrbuf(nbp); if (DOINGSOFTDEP(vp)) - softdep_setup_allocindir_page(ip, lbn, bp, - indirs[i].in_off, nb, 0, nbp); + softdep_setup_allocindir_page(ip, bp, indirs, i, lbn, + nb, 0, nbp); bap[indirs[i].in_off] = nb; /* * If required, write synchronously, otherwise use @@ -930,6 +932,8 @@ fail: * slow, running out of disk space is not expected to be a common * occurence. The error return from fsync is ignored as we already * have an error to return to the user. + * + * XXX Still have to journal the free below */ (void) ffs_syncvnode(vp, MNT_WAIT); for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns; @@ -985,7 +989,7 @@ fail: */ for (blkp = allociblk; blkp < allocblk; blkp++) { ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize, - ip->i_number); + ip->i_number, NULL); } return (error); } Index: /usr/src/sys/ufs/ffs/ffs_inode.c =================================================================== --- /usr/src/sys/ufs/ffs/ffs_inode.c (revision 200709) +++ /usr/src/sys/ufs/ffs/ffs_inode.c (working copy) @@ -232,7 +232,7 @@ ffs_truncate(vp, length, flags, cred, td) if (oldblks[i] == 0) continue; ffs_blkfree(ump, fs, ip->i_devvp, oldblks[i], - sblksize(fs, osize, i), ip->i_number); + sblksize(fs, osize, i), ip->i_number, NULL); } } } @@ -336,6 +336,8 @@ ffs_truncate(vp, length, flags, cred, td) * zero'ed in case it ever becomes accessible again because * of subsequent file growth. Directories however are not * zero'ed as they should grow back initialized to empty. + * + * XXX Still need to manually journal this. */ offset = blkoff(fs, length); if (offset == 0) { @@ -445,7 +447,7 @@ ffs_truncate(vp, length, flags, cred, td) if (lastiblock[level] < 0) { DIP_SET(ip, i_ib[level], 0); ffs_blkfree(ump, fs, ip->i_devvp, bn, - fs->fs_bsize, ip->i_number); + fs->fs_bsize, ip->i_number, NULL); blocksreleased += nblocks; } } @@ -464,7 +466,8 @@ ffs_truncate(vp, length, flags, cred, td) continue; DIP_SET(ip, i_db[i], 0); bsize = blksize(fs, ip, i); - ffs_blkfree(ump, fs, ip->i_devvp, bn, bsize, ip->i_number); + ffs_blkfree(ump, fs, ip->i_devvp, bn, bsize, ip->i_number, + NULL); blocksreleased += btodb(bsize); } if (lastblock < 0) @@ -496,7 +499,7 @@ ffs_truncate(vp, length, flags, cred, td) */ bn += numfrags(fs, newspace); ffs_blkfree(ump, fs, ip->i_devvp, bn, - oldspace - newspace, ip->i_number); + oldspace - newspace, ip->i_number, NULL); blocksreleased += btodb(oldspace - newspace); } } @@ -638,7 +641,7 @@ ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp blocksreleased += blkcount; } ffs_blkfree(ip->i_ump, fs, ip->i_devvp, nb, fs->fs_bsize, - ip->i_number); + ip->i_number, NULL); blocksreleased += nblocks; } Index: /usr/src/sys/ufs/ffs/ffs_snapshot.c =================================================================== --- /usr/src/sys/ufs/ffs/ffs_snapshot.c (revision 200709) +++ /usr/src/sys/ufs/ffs/ffs_snapshot.c (working copy) @@ -582,7 +582,8 @@ loop: len = fragroundup(fs, blkoff(fs, xp->i_size)); if (len != 0 && len < fs->fs_bsize) { ffs_blkfree(ump, copy_fs, vp, - DIP(xp, i_db[loc]), len, xp->i_number); + DIP(xp, i_db[loc]), len, xp->i_number, + NULL); blkno = DIP(xp, i_db[loc]); DIP_SET(xp, i_db[loc], 0); } @@ -598,7 +599,7 @@ loop: DIP_SET(xp, i_db[loc], blkno); if (!error) error = ffs_freefile(ump, copy_fs, vp, xp->i_number, - xp->i_mode); + xp->i_mode, NULL); VOP_UNLOCK(xvp, 0); vdrop(xvp); if (error) { @@ -700,7 +701,7 @@ out1: copy_fs, vp, xp->i_number, - xp->i_mode); + xp->i_mode, NULL); } if (error) { fs->fs_snapinum[snaploc] = 0; @@ -1220,7 +1221,7 @@ mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, ex *ip->i_snapblklist++ = lblkno; if (blkno == BLK_SNAP) blkno = blkstofrags(fs, lblkno); - ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum); + ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum, NULL); } return (0); } @@ -1500,7 +1501,7 @@ mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, ex *ip->i_snapblklist++ = lblkno; if (blkno == BLK_SNAP) blkno = blkstofrags(fs, lblkno); - ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum); + ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum, NULL); } return (0); } Index: /usr/src/sys/ufs/ffs/fs.h =================================================================== --- /usr/src/sys/ufs/ffs/fs.h (revision 200709) +++ /usr/src/sys/ufs/ffs/fs.h (working copy) @@ -337,7 +337,9 @@ struct fs { int32_t fs_avgfilesize; /* expected average file size */ int32_t fs_avgfpdir; /* expected # of files per directory */ int32_t fs_save_cgsize; /* save real cg size to use fs_bsize */ - int32_t fs_sparecon32[26]; /* reserved for future constants */ + int32_t fs_sujournal; /* SUJ journal file */ + int32_t fs_sujfree; /* SUJ free list */ + int32_t fs_sparecon32[24]; /* reserved for future constants */ int32_t fs_flags; /* see FS_ flags below */ int32_t fs_contigsumsize; /* size of cluster summary array */ int32_t fs_maxsymlinklen; /* max length of an internal symlink */ @@ -409,6 +411,7 @@ CTASSERT(sizeof(struct fs) == 1376); #define FS_MULTILABEL 0x20 /* file system is MAC multi-label */ #define FS_GJOURNAL 0x40 /* gjournaled file system */ #define FS_FLAGS_UPDATED 0x80 /* flags have been moved to new location */ +#define FS_SUJ 0x100 /* Filesystem using softupdate journal */ /* * Macros to access bits in the fs_active array. @@ -598,8 +601,32 @@ struct cg { ? (fs)->fs_bsize \ : (fragroundup(fs, blkoff(fs, (size))))) - /* + * Indirect lbns are aligned on NDADDR addresses where single indirects + * are the negated address of the lowest lbn reachable, double indirects + * are this lbn - 1 and triple indirects are this lbn - 2. This yields + * an unusual bit order to determine level. + */ +static inline int +lbn_level(ufs_lbn_t lbn) +{ + if (lbn >= 0) + return 0; + switch (lbn & 0x3) { + case 0: + return (0); + case 1: + break; + case 2: + return (2); + case 3: + return (1); + default: + break; + } + return (-1); +} +/* * Number of inodes in a secondary storage block/fragment. */ #define INOPB(fs) ((fs)->fs_inopb) @@ -610,6 +637,66 @@ struct cg { */ #define NINDIR(fs) ((fs)->fs_nindir) +/* + * Softdep journal record format. + */ + +#define JOP_ADDREF 1 /* Add a reference to an inode. */ +#define JOP_REMREF 2 /* Remove a reference from an inode. */ +#define JOP_NEWBLK 3 /* Allocate a block. */ +#define JOP_FREEBLK 4 /* Free a block or a tree of blocks. */ + +#define JREC_SIZE 32 /* Record and segment header size. */ + +#define SUJ_MIN (1 * 1024 * 1024) /* Minimum journal size */ +#define SUJ_MAX (128 * SUJ_MIN) /* Maximum journal size */ + +/* + * Size of the segment record header. There is at most one for each disk + * block and at least one for each filesystem block in the journal. The + * segment header is followed by an array of records. + */ +struct jsegrec { + uint64_t jsr_seq; /* Our sequence number */ + uint64_t jsr_oldest; /* Oldest valid sequence number */ + uint32_t jsr_cnt; /* Count of valid records */ + uint32_t jsr_crc; /* 32bit crc of the valid space */ + uint64_t jsr_unused; +}; + +struct jrefrec { + uint32_t jr_op; + ino_t jr_ino; + ino_t jr_parent; + int16_t jr_nlink; + uint16_t jr_mode; + off_t jr_diroff; + uint64_t jr_unused; +}; + +struct jblkrec { + uint32_t jb_op; + uint32_t jb_ino; + ufs2_daddr_t jb_blkno; + ufs_lbn_t jb_lbn; + uint16_t jb_frags; + uint16_t jb_oldfrags; + uint32_t jb_unused; +}; + +union jrec { + struct jsegrec rec_jsegrec; + struct jrefrec rec_jrefrec; + struct jblkrec rec_jblkrec; +}; + +#ifdef CTASSERT +CTASSERT(sizeof(struct jsegrec) == JREC_SIZE); +CTASSERT(sizeof(struct jrefrec) == JREC_SIZE); +CTASSERT(sizeof(struct jblkrec) == JREC_SIZE); +CTASSERT(sizeof(union jrec) == JREC_SIZE); +#endif + extern int inside[], around[]; extern u_char *fragtbl[]; -- Test scenario: /rw -t 2m -i 50 -h -v