GENERIC HEAD from 2011-05-14 05:43:33 UTC, r221878M, vmcore.140 KDB: debugger backends: ddb KDB: current backend: ddb 524288K of memory above 4GB ignored Copyright (c) 1992-2011 The FreeBSD Project. Copyright (c) 1979, 1980, 1983, 1986, 1988, 1989, 1991, 1992, 1993, 1994 The Regents of the University of California. All rights reserved. FreeBSD is a registered trademark of The FreeBSD Foundation. FreeBSD 9.0-CURRENT #0 r221878M: Mon May 16 06:14:00 CEST 2011 pho@x4.osted.lan:/usr/src/sys/i386/compile/PHO i386 WARNING: WITNESS option enabled, expect reduced performance. WARNING: DIAGNOSTIC option enabled, expect reduced performance. CPU: AMD Phenom(tm) 9150e Quad-Core Processor (1800.02-MHz 686-class CPU) Origin = "AuthenticAMD" Id = 0x100f23 Family = 10 Model = 2 Stepping = 3 Features=0x178bfbff Features2=0x802009 AMD Features=0xee500800 AMD Features2=0x7ff TSC: P-state invariant real memory = 4294967296 (4096 MB) avail memory = 3536019456 (3372 MB) : Trying to mount root from ufs:/dev/ad4s1a [rw]... Setting hostuuid: 00000000-0000-0000-0000-00218515337d. Setting hostid: 0x6b64ac17. Starting ddb. Entropy harvesting: interrupts ethernet point_to_point kickstart. Starting file system checks: /dev/ad4s1a: FILE SYSTEM CLEAN; SKIPPING CHECKS /dev/ad4s1a: clean, 211869 free (1381 frags, 26311 blocks, 0.1% fragmentation) /dev/ad4s1f: FILE SYSTEM CLEAN; SKIPPING CHECKS /dev/ad4s1f: clean, 189553 free (4201 frags, 23169 blocks, 0.4% fragmentation) /dev/label/tmp: FILE SYSTEM CLEAN; SKIPPING CHECKS /dev/label/tmp: clean, 91337316 free (3140 frags, 11416772 blocks, 0.0% fragmentation) /dev/ad4s1d: FILE SYSTEM CLEAN; SKIPPING CHECKS /dev/ad4s1d: clean, 2497834 free (226146 frags, 283961 blocks, 2.2% fragmentation) /dev/ad4s1e: FILE SYSTEM CLEAN; SKIPPING CHECKS /dev/ad4s1e: clean, 11244636 free (56588 frags, 1398506 blocks, 0.2% fragmentation) Mounting local file systems:. Setting hostname: x4.osted.lan. re0: link state changed to DOWN Starting Network: lo0 re0 fwe0 fwip0. lo0: flags=8049 metric 0 mtu 16384 options=3 inet 127.0.0.1 netmask 0xff000000 inet6 ::1 prefixlen 128 inet6 fe80::1%lo0 prefixlen 64 scopeid 0xa nd6 options=21 re0: flags=8843 metric 0 mtu 1500 options=389b ether 00:21:85:15:33:7d inet 192.168.1.101 netmask 0xffffff00 broadcast 192.168.1.255 inet6 fe80::221:85ff:fe15:337d%re0 prefixlen 64 tentative scopeid 0x1 nd6 options=29 media: Ethernet autoselect (none) status: no carrier fwe0: flags=8802 metric 0 mtu 1500 options=8 ether 02:dc:10:62:ad:eb ch 1 dma -1 fwip0: flags=8802 metric 0 mtu 1500 lladdr 0.dc.10.0.1.62.ad.eb.a.2.ff.fe.0.0.0.0 Starting devd. Starting Network: usbus0. Starting Network: usbus1. Starting Network: usbus2. Starting Network: usbus3. Starting Network: usbus4. Starting Network: usbus5. Starting Network: fwe0. fwe0: flags=8802 metric 0 mtu 1500 options=8 ether 02:dc:10:62:ad:eb ch 1 dma -1 re0: link state changed to UP Starting Network: fwip0. fwip0: flags=8802 metric 0 mtu 1500 lladdr 0.dc.10.0.1.62.ad.eb.a.2.ff.fe.0.0.0.0 add net default: gateway 192.168.1.1 add net ::ffff:0.0.0.0: gateway ::1 add net ::0.0.0.0: gateway ::1 add net fe80::: gateway ::1 add net ff02::: gateway ::1 ELF ldconfig path: /lib /usr/lib /usr/lib/compat /usr/local/lib /usr/local/lib/compat/pkg /usr/local/kde4/lib /usr/local/lib /usr/local/lib/compat /usr/local/lib/compat/pkg /usr/local/lib/nss /usr/local/lib/qt4 /usr/local/lib/virtualbox /usr/local/lib/wine a.out ldconfig path: /usr/lib/aout /usr/lib/compat/aout Creating and/or trimming log files. Starting syslogd. No core dumps found. Additional ABI support: linux. Starting rpcbind. NFS access cache time=60 sysctl: unknown oid 'vfs.nfs.access_cache_timeout' /etc/rc: WARNING: failed to set access cache timeout lock order reversal: 1st 0xe071ce40 bufwait (bufwait) @ kern/vfs_bio.c:2658 2nd 0xc7504000 dirhash (dirhash) @ ufs/ufs/ufs_dirhash.c:284 KDB: stack backtrace: db_trace_self_wrapper(c0e89936,2e687361,38323a63,a0d34,c0998883,...) at db_trace_self_wrapper+0x26 kdb_backtrace(c09e0e4b,c0e8d253,c6d5e2a0,c6d61ec0,ef1b989c,...) at kdb_backtrace+0x2a _witness_debugger(c0e8d253,c7504000,c0ebc0ba,c6d61ec0,c0ebbd4c,...) at _witness_debugger+0x25 witness_checkorder(c7504000,9,c0ebbd43,11c,0,...) at witness_checkorder+0x839 _sx_xlock(c7504000,0,c0ebbd43,11c,e13bda60,...) at _sx_xlock+0x85 ufsdirhash_acquire(c74c8984,e13bda60,ef1b9964,c0be7f24,c742a658,...) at ufsdirhash_acquire+0x48 ufsdirhash_move(c742a658,e13bda60,1a60,1a50,ef1b9954,...) at ufsdirhash_move+0xf ufs_direnter(c74c8984,c7970414,ef1b99ec,ef1b9bd0,e071d460,...) at ufs_direnter+0x614 ufs_mkdir(ef1b9bf8,c0ed9cf5,0,0,ef1b9b3c,...) at ufs_mkdir+0x916 VOP_MKDIR_APV(c0fbb340,ef1b9bf8,ef1b9bd0,ef1b9b3c,0,...) at VOP_MKDIR_APV+0xc5 kern_mkdirat(c74ba8a0,ffffff9c,28404020,0,1c0,...) at kern_mkdirat+0x225 kern_mkdir(c74ba8a0,28404020,0,1c0,ef1b9c7c,...) at kern_mkdir+0x2e mkdir(c74ba8a0,ef1b9cec,ef1b9d28,c0e8ba9e,0,...) at mkdir+0x29 syscallenter(c74ba8a0,ef1b9ce4,ef1b9ce4,0,0,...) at syscallenter+0x263 syscall(ef1b9d28) at syscall+0x4f Xint0x80_syscall() at Xint0x80_syscall+0x21 --- syscall (136, FreeBSD ELF32, mkdir), eip = 0x28172643, esp = 0xbfbfe8cc, ebp = 0xbfbfed78 --- Clearing /tmp (X related). Starting mountd. Starting nfsd. Recovering vi editor sessions:lock order reversal: 1st 0xc7a06c94 ufs (ufs) @ kern/vfs_lookup.c:501 2nd 0xe07249c0 bufwait (bufwait) @ ufs/ffs/ffs_softdep.c:12662 3rd 0xc7a06310 ufs (ufs) @ kern/vfs_subr.c:2134 KDB: stack backtrace: db_trace_self_wrapper(c0e89936,2e706564,32313a63,d323636,a,...) at db_trace_self_wrapper+0x26 kdb_backtrace(c09e0e4b,c0e8d26c,c6d5e2a0,c6d61e58,ef1ed32c,...) at kdb_backtrace+0x2a _witness_debugger(c0e8d26c,c7a06310,c0e7c750,c6d61e58,c0e94e7b,...) at _witness_debugger+0x25 witness_checkorder(c7a06310,9,c0e94e72,856,0,...) at witness_checkorder+0x839 __lockmgr_args(c7a06310,80100,c7a0637c,0,0,...) at __lockmgr_args+0x814 ffs_lock(ef1ed450,c09f23db,c0e941d7,80100,c7a062b8,...) at ffs_lock+0xa1 VOP_LOCK1_APV(c0fbb340,ef1ed450,c77cd0b0,c0fd6300,c7a062b8,...) at VOP_LOCK1_APV+0xb5 _vn_lock(c7a062b8,80100,c0e94e72,856,4,...) at _vn_lock+0x78 vget(c7a062b8,80100,c77cd000,50,0,...) at vget+0xbb vfs_hash_get(c7411b50,15ef44,80000,c77cd000,ef1ed5a8,...) at vfs_hash_get+0xed ffs_vgetf(c7411b50,15ef44,80000,ef1ed5a8,1,...) at ffs_vgetf+0x49 softdep_sync_metadata(c7a06c3c,0,c0ebb839,144,0,...) at softdep_sync_metadata+0x10e2 ffs_syncvnode(c7a06c3c,1,c7a06d34,c6d5e2a0,c115b4e0,...) at ffs_syncvnode+0x3e2 ffs_truncate(c7a06c3c,200,0,880,c7937400,...) at ffs_truncate+0x8bb ufs_direnter(c7a06c3c,c7a062b8,ef1ed914,ef1edba4,0,...) at ufs_direnter+0x924 ufs_makeinode(ef1edba4,c0fbb840,ef1edb00,ef1eda5c,c0cf5c55,...) at ufs_makeinode+0x5c5 ufs_create(ef1edb00,c0ed9ebb,0,0,ef1edb78,...) at ufs_create+0x30 VOP_CREATE_APV(c0fbb340,ef1edb00,ef1edba4,ef1eda98,0,...) at VOP_CREATE_APV+0xc5 vn_open_cred(ef1edb78,ef1edc2c,1b0,0,c7937400,...) at vn_open_cred+0x205 vn_open(ef1edb78,ef1edc2c,1b0,c74d5c40,0,...) at vn_open+0x3b kern_openat(c77cd000,ffffff9c,28839f60,0,a03,...) at kern_openat+0x12f kern_open(c77cd000,28839f60,0,a02,1b0,...) at kern_open+0x35 open(c77cd000,ef1edcec,ef1edd28,c0e8ba9e,0,...) at open+0x30 syscallenter(c77cd000,ef1edce4,ef1edce4,0,0,...) at syscallenter+0x263 syscall(ef1edd28) at syscall+0x4f Xint0x80_syscall() at Xint0x80_syscall+0x21 --- syscall (5, FreeBSD ELF32, open), eip = 0x283d1683, esp = 0xbfbfc4fc, ebp = 0xbfbfc588 --- . Updating motd:. Starting ntpd. Starting default moused. Configuring syscons: keymap blanktime. Starting sshd. Starting cron. Local package initialization: backuplock order reversal: 1st 0xc74c8df0 ufs (ufs) @ kern/vfs_mount.c:1193 2nd 0xc721a1b4 devfs (devfs) @ ufs/ffs/ffs_softdep.c:1729 KDB: stack backtrace: db_trace_self_wrapper(c0e89936,37313a63,a0d3932,ef227900,c09f23db,...) at db_trace_self_wrapper+0x26 kdb_backtrace(c09e0e4b,c0e8d253,c6d61e58,c6d61d88,ef227a04,...) at kdb_backtrace+0x2a _witness_debugger(c0e8d253,c721a1b4,c0e74907,c6d61d88,c0eb8505,...) at _witness_debugger+0x25 witness_checkorder(c721a1b4,9,c0eb84fc,6c1,c721a220,...) at witness_checkorder+0x839 __lockmgr_args(c721a1b4,80400,c721a220,0,0,...) at __lockmgr_args+0x814 vop_stdlock(ef227b24,4,c0e83fe7,80400,c721a15c,...) at vop_stdlock+0x65 VOP_LOCK1_APV(c0f910e0,ef227b24,c118ef28,c0fd6300,c721a15c,...) at VOP_LOCK1_APV+0xb5 _vn_lock(c721a15c,80400,c0eb84fc,6c1,c774d2d4,...) at _vn_lock+0x78 softdep_flushworklist(c774d2d4,ef227bd0,c743d8a0,574,0,...) at softdep_flushworklist+0x47 ffs_sync(c774d2d4,1,ef227c14,4ee,0,...) at ffs_sync+0x2fd dounmount(c774d2d4,8080000,c743d8a0,473,5b85c5ea,...) at dounmount+0x447 unmount(c743d8a0,ef227cec,281766e5,1,0,...) at unmount+0x310 syscallenter(c743d8a0,ef227ce4,c0cd2d3d,c1010f70,0,...) at syscallenter+0x263 syscall(ef227d28) at syscall+0x4f Xint0x80_syscall() at Xint0x80_syscall+0x21 --- syscall (22, FreeBSD ELF32, unmount), eip = 0x280dcd6b, esp = 0xbfbfe64c, ebp = 0xbfbfe718 --- fsck -y /tmp watchdogd. Starting inetd. Mon May 16 17:11:33 CEST 2011 FreeBSD/i386 (x4.osted.lan) (console) login: May 16 17:15:09 x4 su: pho to root on /dev/pts/0 softdep_waitidle: Failed to flush worklist for 0xc808cb50 softdep_waitidle: Failed to flush worklist for 0xc808cb50 softdep_waitidle: Failed to flush worklist for 0xc808cb50 softdep_waitidle: Failed to flush worklist for 0xc808cb50 softdep_waitidle: Failed to flush worklist for 0xc808cb50 softdep_waitidle: Failed to flush worklist for 0xc808cb50 softdep_waitidle: Failed to flush worklist for 0xc808cb50 softdep_waitidle: Failed to flush worklist for 0xc808cb50 softdep_waitidle: Failed to flush worklist for 0xc808cb50 : : softdep_waitidle: Failed to flush worklist for 0xc808cb50 softdep_waitidle: Failed to flush worklist for 0xc808cb50 softdep_waitidle: Failed to flush worklist for 0xc808cb50 softdep_waitidle: Failed to flush worklist for 0xc808cb50 softdep_waitidle: Failed to flush worklist for 0xc808cb50 softdep_waitidle: Failed to flush worklist for 0xc808cb50 Stopping inetd. Waiting for PIDS: 2152. Shutting down local packages:. Stopping cron. Waiting for PIDS: 2100. Stopping sshd. Waiting for PIDS: 2080. Stopping watchdogd. Waiting for PIDS: 2125. Stopping moused. Waiting for PIDS: 2020, 2020. Stopping ntpd. Waiting for PIDS: 1960. Stopping nfsd. Waiting for PIDS: 1841 1842. Stopping mountd. Waiting for PIDS: 1832. Stopping rpcbind. Waiting for PIDS: 1733. Stopping devd. Waiting for PIDS: 1507. Writing entropy file:. Terminated . May 16 17:50:13 x4 syslogd: exiting on signal 15 Enter full pathname of shell or RETURN for /bin/sh: # mount /dev/ad4s1a on / (ufs, local) devfs on /dev (devfs, local, multilabel) /dev/ad4s1f on /home (ufs, local, soft-updates) /dev/ad4s1d on /usr (ufs, local, soft-updates) /dev/ad4s1e on /var (ufs, local, soft-updates) procfs on /proc (procfs, local) linprocfs on /usr/compat/linux/proc (linprocfs, local) /dev/label/tmp on /tmp (ufs, NFS exported, local, soft-updates) /dev/md5a on /mnt (ufs, NFS exported, local, soft-updates) # umount /mnt softdep_waitidle: Failed to flush worklist for 0xc808cb50 umount: unmount of /mnt failed: Device busy # umount -f /mnt softdep_waitidle: Failed to flush worklist for 0xc808cb50 umount: unmount of /mnt failed: Device busy # ~KDB: enter: Line break on console [ thread pid 11 tid 100004 ] Stopped at kdb_enter+0x3a: movl $0,kdb_why db> show mount 0xc7410b50 /dev/ad4s1a on / (ufs) 0xc7411000 devfs on /dev (devfs) 0xc774d5a8 /dev/ad4s1f on /home (ufs) 0xc774d000 /dev/ad4s1d on /usr (ufs) 0xc7411b50 /dev/ad4s1e on /var (ufs) 0xc741187c procfs on /proc (procfs) 0xc74115a8 linprocfs on /usr/compat/linux/proc (linprocfs) 0xc7753000 /dev/label/tmp on /tmp (ufs) 0xc808cb50 /dev/md5a on /mnt (ufs) More info: show mount db> show mount 0xc808cb50 0xc808cb50 /dev/md5a on /mnt (ufs) mnt_flag = SOFTDEP, EXPORTED, DEFEXPORTED, LOCAL mnt_kern_flag = SOFTDEP, EXTENDED_SHARED, MPSAFE, LOOKUP_SHARED mnt_opt = fstype, fspath, from, errmsg mnt_stat = { version=537068824 type=2 flags=0x0000000000201300 bsize=2048 iosize=16384 blocks=1013011 bfree=1013009 bavail=931969 files=282622 ffree=282620 syncwrites=0 asyncwrites=0 syncreads=0 asyncreads=0 namemax=255 owner=0 fsid=[1305558915, 272911100] } mnt_cred = { uid=0 ruid=0 } mnt_ref = 1 mnt_gen = 1 mnt_nvnodelistsize = 1 mnt_writeopcount = 0 mnt_noasync = 1 mnt_maxsymlinklen = 120 mnt_iosize_max = 131072 mnt_hashseed = 3511406619 mnt_secondary_writes = 0 mnt_secondary_accwrites = 44811 mnt_gjprovider = NULL vnode 0xc85fa828: tag syncer, type VNON usecount 1, writecount 0, refcount 1 mountedhere 0 flags () lock type syncer: UNLOCKED #0 0xc0990a4e at __lockmgr_args+0xbfe #1 0xc0a300c5 at vop_stdlock+0x65 #2 0xc0cf4c85 at VOP_LOCK1_APV+0xb5 #3 0xc0a50048 at _vn_lock+0x78 #4 0xc0a454ac at vfs_allocate_syncvnode+0x6c #5 0xc0a3a434 at dounmount+0x544 #6 0xc0a3a900 at unmount+0x310 #7 0xc09eada3 at syscallenter+0x263 #8 0xc0cd24df at syscall+0x4f #9 0xc0cbb9a1 at Xint0x80_syscall+0x21 db> run pho db:0:pho> bt Tracing pid 11 tid 100004 td 0xc6dbf5c0 kdb_enter(c0d0f935,c0e4fcf0,0,c716a080,0,...) at kdb_enter+0x3a uart_intr(c716a000,0,c11b3e78,c6b2dbe8,0,...) at uart_intr+0x126 intr_event_handle(c6e04d80,c6b2dc10,c6b2dc34,0,c6fe0600,...) at intr_event_handle+0x76 intr_execute_handlers(c6e030d0,c6b2dc10,2,c6b2dc50,c0cbbd05,...) at intr_execute_handlers+0x49 lapic_handle_intr(30,c6b2dc10) at lapic_handle_intr+0x36 Xapic_isr1() at Xapic_isr1+0x35 --- interrupt, eip = 0xc0cb7305, esp = 0xc6b2dc50, ebp = 0xc6b2dc50 --- acpi_cpu_c1(c6b2dc6c,c0fd79d0,2,2f4,945,...) at acpi_cpu_c1+0x5 acpi_cpu_idle(0,c6b2dca0,c0cc5520,0,ffffffff,...) at acpi_cpu_idle+0x122 cpu_idle_acpi(0,ffffffff,c1020200,2,c6b2dce8,...) at cpu_idle_acpi+0x2f cpu_idle(0,c6b2dcc4,c0e87a2b,a05,c6dbf5c0,...) at cpu_idle+0x90 sched_idletd(0,c6b2dd28,c0e8034c,390,c6dbd834,...) at sched_idletd+0x263 fork_exit(c09ce540,0,c6b2dd28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc6b2dd60, ebp = 0 --- db:0:bt> show allpcpu Current CPU: 2 cpuid = 0 dynamic pcpu = 0x44e600 curthread = 0xc6dbf000: pid 11 "idle: cpu0" curpcb = 0xc6b33d80 fpcurthread = none idlethread = 0xc6dbf000: tid 100006 "idle: cpu0" APIC ID = 0 currentldt = 0x50 spin locks held: cpuid = 1 dynamic pcpu = 0x5b43600 curthread = 0xc6dbf2e0: pid 11 "idle: cpu1" curpcb = 0xc6b30d80 fpcurthread = none idlethread = 0xc6dbf2e0: tid 100005 "idle: cpu1" APIC ID = 1 currentldt = 0x50 spin locks held: cpuid = 2 dynamic pcpu = 0x5b46600 curthread = 0xc6dbf5c0: pid 11 "idle: cpu2" curpcb = 0xc6b2dd80 fpcurthread = none idlethread = 0xc6dbf5c0: tid 100004 "idle: cpu2" APIC ID = 2 currentldt = 0x50 spin locks held: cpuid = 3 dynamic pcpu = 0x5b49600 curthread = 0xc6dbf8a0: pid 11 "idle: cpu3" curpcb = 0xc6b2ad80 fpcurthread = none idlethread = 0xc6dbf8a0: tid 100003 "idle: cpu3" APIC ID = 3 currentldt = 0x50 spin locks held: db:0:allpcpu> show alllocks db:0:alllocks> show lockedvnods Locked vnodes db:0:lockedvnods> show mount 0xc7410b50 /dev/ad4s1a on / (ufs) 0xc7411000 devfs on /dev (devfs) 0xc774d5a8 /dev/ad4s1f on /home (ufs) 0xc774d000 /dev/ad4s1d on /usr (ufs) 0xc7411b50 /dev/ad4s1e on /var (ufs) 0xc741187c procfs on /proc (procfs) 0xc74115a8 linprocfs on /usr/compat/linux/proc (linprocfs) 0xc7753000 /dev/label/tmp on /tmp (ufs) 0xc808cb50 /dev/md5a on /mnt (ufs) More info: show mount db:0:mount> ps pid ppid pgrp uid state wmesg wchan cmd 95950 1 95950 0 Ss+ ttyin 0xc6f52a70 sh 9384 0 0 0 DL mdwait 0xc8085000 [md5] 18 0 0 0 DL sdflush 0xc118ef80 [softdepflush] 17 0 0 0 DL syncer 0xc1188a14 [syncer] 16 0 0 0 DL vlruwt 0xc73a4834 [vnlru] 9 0 0 0 DL psleep 0xc11886a8 [bufdaemon] 8 0 0 0 DL pgzero 0xc1190ddc [pagezero] 7 0 0 0 DL psleep 0xc11909c8 [vmdaemon] 6 0 0 0 DL psleep 0xc1190990 [pagedaemon] 5 0 0 0 DL ccb_scan 0xc0fd7754 [xpt_thrd] 4 0 0 0 DL waiting_ 0xc118a4d8 [sctp_iterator] 3 0 0 0 DL - 0xc6fe143c [fdc0] 2 0 0 0 SL - 0xc706b000 [fw0_probe] 15 0 0 0 DL (threaded) [usb] 100057 D - 0xc704bd34 [usbus5] 100056 D - 0xc704bd04 [usbus5] 100055 D - 0xc704bcd4 [usbus5] 100054 D - 0xc704bca4 [usbus5] 100052 D - 0xc7040b5c [usbus4] 100051 D - 0xc7040b2c [usbus4] 100050 D - 0xc7040afc [usbus4] 100049 D - 0xc7040acc [usbus4] 100048 D - 0xc7039b5c [usbus3] 100047 D - 0xc7039b2c [usbus3] 100046 D - 0xc7039afc [usbus3] 100045 D - 0xc7039acc [usbus3] 100044 D - 0xc702fb5c [usbus2] 100043 D - 0xc702fb2c [usbus2] 100042 D - 0xc702fafc [usbus2] 100041 D - 0xc702facc [usbus2] 100039 D - 0xc7029b5c [usbus1] 100038 D - 0xc7029b2c [usbus1] 100037 D - 0xc7029afc [usbus1] 100036 D - 0xc7029acc [usbus1] 100034 D - 0xc7019b5c [usbus0] 100033 D - 0xc7019b2c [usbus0] 100032 D - 0xc7019afc [usbus0] 100031 D - 0xc7019acc [usbus0] 14 0 0 0 DL - 0xc1011684 [yarrow] 13 0 0 0 DL (threaded) [geom] 100015 D - 0xc100f2c8 [g_down] 100014 D - 0xc100f2c4 [g_up] 100013 D - 0xc100f2bc [g_event] 12 0 0 0 WL (threaded) [intr] 100065 I [irq12: psm0] 100064 I [irq1: atkbd0] 100062 I [swi0: uart] 100059 I [irq20: fwohci0] 100058 I [irq14: ata0] 100053 I [irq19: ehci0] 100040 I [irq18: ohci2 ohci4] 100035 I [irq17: ohci1 ohci3] 100030 I [irq16: ohci0] 100029 I [irq22: ahci0] 100028 I [irq256: re0] 100027 I [swi2: cambio] 100021 I [swi6: task queue] 100020 I [swi6: Giant taskq] 100018 I [swi5: +] 100012 I [swi4: clock] 100011 I [swi4: clock] 100010 I [swi4: clock] 100009 I [swi4: clock] 100008 I [swi3: vm] 100007 I [swi1: netisr 0] 11 0 0 0 RL (threaded) [idle] 100006 Run CPU 0 [idle: cpu0] 100005 Run CPU 1 [idle: cpu1] 100004 Run CPU 2 [idle: cpu2] 100003 Run CPU 3 [idle: cpu3] 1 0 1 0 SLs wait 0xc6dbdaf0 [init] 10 0 0 0 DL audit_wo 0xc118e700 [audit] 0 0 0 0 DLs (threaded) [kernel] 100076 D - 0xc71853c0 [mca taskq] 100066 D - 0xc1011684 [deadlkres] 100060 D - 0xc7073900 [fw0_taskq] 100026 D - 0xc6d92340 [acpi_task_2] 100025 D - 0xc6d92340 [acpi_task_1] 100024 D - 0xc6d92340 [acpi_task_0] 100023 D - 0xc6d92380 [kqueue taskq] 100022 D - 0xc6d923c0 [ffs_trim taskq] 100019 D - 0xc6d92500 [thread taskq] 100016 D - 0xc6d92a80 [firmware taskq] 100000 D sched 0xc100f3a0 [swapper] db:0:ps> allt Tracing command sh pid 95950 tid 100106 td 0xc743ab80 sched_switch(c743ab80,0,104,191,6087050c,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c743ab80,0,c0e8b160,1a7,0,...) at sleepq_switch+0x162 sleepq_catch_signals(c099890a,c6f52a04,0,c0e83fe7,c743ab80,...) at sleepq_catch_signals+0xf9 sleepq_wait_sig(c6f52a70,0,ef237ad8,101,0,...) at sleepq_wait_sig+0x17 _cv_wait_sig(c6f52a70,c6f52a04,c0e8f7d0,514,0,...) at _cv_wait_sig+0x243 tty_wait(c6f52a00,c6f52a70,ef237c28,1,0,...) at tty_wait+0x71 ttydisc_read(c6f52a00,ef237c28,0,9f,0,...) at ttydisc_read+0x22c ttydev_read(c6da2800,ef237c28,0,0,1,...) at ttydev_read+0xaa devfs_read_f(c78d3c40,ef237c28,c6d9ee80,0,c743ab80,...) at devfs_read_f+0x7e dofileread(ef237c28,ffffffff,ffffffff,0,c78d3c40,...) at dofileread+0x9e kern_readv(c743ab80,0,ef237c28,ef237c48,1,...) at kern_readv+0x58 read(c743ab80,ef237cec,ef237d28,c0e8ba9e,0,...) at read+0x4f syscallenter(c743ab80,ef237ce4,ef237ce4,0,0,...) at syscallenter+0x263 syscall(ef237d28) at syscall+0x4f Xint0x80_syscall() at Xint0x80_syscall+0x21 --- syscall (3, FreeBSD ELF32, read), eip = 0x281fa623, esp = 0xbfbfecac, ebp = 0xbfbfecd8 --- Tracing command md5 pid 9384 tid 100172 td 0xc805e5c0 sched_switch(c805e5c0,0,104,191,cde92811,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,5c,...) at mi_switch+0x219 sleepq_switch(c805e5c0,0,c0e8b160,268,0,...) at sleepq_switch+0x162 sleepq_wait(c8085000,5c,c0e31d51,0,0,...) at sleepq_wait+0x63 _sleep(c8085000,c8085020,25c,c0e31d51,0,...) at _sleep+0x372 md_kthread(c8085000,ef385d28,c0e8034c,390,c805b000,...) at md_kthread+0x11a fork_exit(c07276f0,c8085000,ef385d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xef385d60, ebp = 0 --- Tracing command softdepflush pid 18 tid 100075 td 0xc7045b80 sched_switch(c7045b80,0,104,191,940a05ea,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,54,...) at mi_switch+0x219 sleepq_switch(c7045b80,0,c0e8b160,28b,0,...) at sleepq_switch+0x162 sleepq_timedwait(c118ef80,54,c0ebaa1e,0,0,...) at sleepq_timedwait+0x6b _sleep(c118ef80,c118ef28,54,c0ebaa1e,3e8,...) at _sleep+0x342 softdep_flush(0,ecf96d28,c0e8034c,390,c73a42bc,...) at softdep_flush+0x25a fork_exit(c0bd8550,0,ecf96d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xecf96d60, ebp = 0 --- Tracing command syncer pid 17 tid 100074 td 0xc7170000 sched_switch(c7170000,0,104,191,a0ce5355,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c7170000,0,c0e8b160,28b,c7170000,...) at sleepq_switch+0x162 sleepq_timedwait(c1188a14,0,ecf93c78,1,0,...) at sleepq_timedwait+0x6b _cv_timedwait(c1188a14,c1188a00,3e8,76a,4e20,...) at _cv_timedwait+0x252 sched_sync(0,ecf93d28,c0e8034c,390,c73a4578,...) at sched_sync+0x595 fork_exit(c0a42990,0,ecf93d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xecf93d60, ebp = 0 --- Tracing command vnlru pid 16 tid 100073 td 0xc71702e0 sched_switch(c71702e0,0,104,191,9bfa3c88,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,60,...) at mi_switch+0x219 sleepq_switch(c71702e0,0,c0e8b160,28b,0,...) at sleepq_switch+0x162 sleepq_timedwait(c73a4834,60,c0e95de4,0,0,...) at sleepq_timedwait+0x6b _sleep(c73a4834,c11889d4,260,c0e95de4,3e8,...) at _sleep+0x342 vnlru_proc(0,ecf90d28,c0e8034c,390,c73a4834,...) at vnlru_proc+0xe7 fork_exit(c0a44a00,0,ecf90d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xecf90d60, ebp = 0 --- Tracing command bufdaemon pid 9 tid 100072 td 0xc71705c0 sched_switch(c71705c0,0,104,191,9d286100,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,54,...) at mi_switch+0x219 sleepq_switch(c71705c0,0,c0e8b160,28b,0,...) at sleepq_switch+0x162 sleepq_timedwait(c11886a8,54,c0e92e12,0,0,...) at sleepq_timedwait+0x6b _sleep(c11886a8,c11886b0,54,c0e92e12,3e8,...) at _sleep+0x342 buf_daemon(0,ecf8dd28,c0e8034c,390,c73a4af0,...) at buf_daemon+0x175 fork_exit(c0a2a100,0,ecf8dd28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xecf8dd60, ebp = 0 --- Tracing command pagezero pid 8 tid 100071 td 0xc71708a0 sched_switch(c71708a0,0,104,191,b2d09b5a,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c71708a0,0,c0e8b160,28b,0,...) at sleepq_switch+0x162 sleepq_timedwait(c1190ddc,0,c0ec0e48,0,0,...) at sleepq_timedwait+0x6b _sleep(c1190ddc,c118f880,0,c0ec0e48,493e0,...) at _sleep+0x342 vm_pagezero(0,ecf8ad28,c0e8034c,390,c73a6000,...) at vm_pagezero+0xdc fork_exit(c0c1acf0,0,ecf8ad28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xecf8ad60, ebp = 0 --- Tracing command vmdaemon pid 7 tid 100070 td 0xc7170b80 sched_switch(c7170b80,0,104,191,9463aa95,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,74,...) at mi_switch+0x219 sleepq_switch(c7170b80,0,c0e8b160,268,0,...) at sleepq_switch+0x162 sleepq_wait(c11909c8,74,c0e92e12,0,0,...) at sleepq_wait+0x63 _sleep(c11909c8,c11909cc,74,c0e92e12,0,...) at _sleep+0x372 vm_daemon(0,ecf87d28,c0e8034c,390,c6dbe2bc,...) at vm_daemon+0x59 fork_exit(c0c148b0,0,ecf87d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xecf87d60, ebp = 0 --- Tracing command pagedaemon pid 6 tid 100069 td 0xc7171000 sched_switch(c7171000,0,104,191,c452d1da,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,54,...) at mi_switch+0x219 sleepq_switch(c7171000,0,c0e8b160,28b,0,...) at sleepq_switch+0x162 sleepq_timedwait(c1190990,54,c0e92e12,0,0,...) at sleepq_timedwait+0x6b _sleep(c1190990,c118f880,54,c0e92e12,1388,...) at _sleep+0x342 vm_pageout(0,ecf84d28,c0e8034c,390,c6dbe578,...) at vm_pageout+0x2c7 fork_exit(c0c158d0,0,ecf84d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xecf84d60, ebp = 0 --- Tracing command xpt_thrd pid 5 tid 100068 td 0xc71712e0 sched_switch(c71712e0,0,104,191,903efa7d,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,5c,...) at mi_switch+0x219 sleepq_switch(c71712e0,0,c0e8b160,268,0,...) at sleepq_switch+0x162 sleepq_wait(c0fd7754,5c,c0cf9c52,0,0,...) at sleepq_wait+0x63 _sleep(c0fd7754,c0fd7798,5c,c0cf9c52,0,...) at _sleep+0x372 xpt_scanner_thread(0,ecf81d28,c0e8034c,390,c6dbe834,...) at xpt_scanner_thread+0x47 fork_exit(c048bb70,0,ecf81d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xecf81d60, ebp = 0 --- Tracing command sctp_iterator pid 4 tid 100067 td 0xc71715c0 sched_switch(c71715c0,0,104,191,65e6de42,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c71715c0,0,c0e8b160,268,0,...) at sleepq_switch+0x162 sleepq_wait(c118a4d8,0,c0ea8dbf,0,0,...) at sleepq_wait+0x63 _sleep(c118a4d8,c118a4a0,0,c0ea8dbf,0,...) at _sleep+0x372 sctp_iterator_thread(0,ecf7ed28,c0e8034c,390,c6dbeaf0,...) at sctp_iterator_thread+0x5c fork_exit(c0ad3ae0,0,ecf7ed28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xecf7ed60, ebp = 0 --- Tracing command fdc0 pid 3 tid 100063 td 0xc70362e0 sched_switch(c70362e0,0,104,191,a0ce6395,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,5c,...) at mi_switch+0x219 sleepq_switch(c70362e0,0,c0e8b160,28b,0,...) at sleepq_switch+0x162 sleepq_timedwait(c6fe143c,5c,c0e78e72,0,0,...) at sleepq_timedwait+0x6b _sleep(c6fe143c,c6fe14f0,5c,c0e78e72,3e8,...) at _sleep+0x342 fdc_thread(c6fe1400,ecf72d28,c0e8034c,390,c701e000,...) at fdc_thread+0x27d fork_exit(c0c99e60,c6fe1400,ecf72d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xecf72d60, ebp = 0 --- Tracing command fw0_probe pid 2 tid 100061 td 0xc70368a0 sched_switch(c70368a0,0,104,191,9070175a,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,6c,...) at mi_switch+0x219 sleepq_switch(c70368a0,0,c0e8b160,1a7,6c,...) at sleepq_switch+0x162 sleepq_catch_signals(c0e8b160,160,0,100,100,...) at sleepq_catch_signals+0xf9 sleepq_wait_sig(c706b000,6c,c0e78e72,100,0,...) at sleepq_wait_sig+0x17 _sleep(c706b000,c706f488,16c,c0e78e72,0,...) at _sleep+0x35c fw_bus_probe_thread(c706b000,ecf62d28,c0e8034c,390,c701e2bc,...) at fw_bus_probe_thread+0xa08 fork_exit(c06aaa90,c706b000,ecf62d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xecf62d60, ebp = 0 --- Tracing command usb pid 15 tid 100057 td 0xc70435c0 sched_switch(c70435c0,0,104,191,7da01d4f,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c70435c0,0,c0e8b160,268,c70435c0,...) at sleepq_switch+0x162 sleepq_wait(c704bd34,0,ecf4fcac,1,0,...) at sleepq_wait+0x63 _cv_wait(c704bd34,c704bdd4,c0e5751e,6b,c704bd3c,...) at _cv_wait+0x243 usb_process(c704bd2c,ecf4fd28,c0e8034c,390,c701e578,...) at usb_process+0x193 fork_exit(c0850390,c704bd2c,ecf4fd28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xecf4fd60, ebp = 0 --- Tracing command usb pid 15 tid 100056 td 0xc70438a0 sched_switch(c70438a0,0,104,191,c2d1ebf5,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c70438a0,0,c0e8b160,268,c70438a0,...) at sleepq_switch+0x162 sleepq_wait(c704bd04,0,ecf4ccac,1,0,...) at sleepq_wait+0x63 _cv_wait(c704bd04,c704bdd4,c0e5751e,6b,c704bd0c,...) at _cv_wait+0x243 usb_process(c704bcfc,ecf4cd28,c0e8034c,390,c701e578,...) at usb_process+0x193 fork_exit(c0850390,c704bcfc,ecf4cd28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xecf4cd60, ebp = 0 --- Tracing command usb pid 15 tid 100055 td 0xc7043b80 sched_switch(c7043b80,0,104,191,7d36fb84,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c7043b80,0,c0e8b160,268,c7043b80,...) at sleepq_switch+0x162 sleepq_wait(c704bcd4,0,ecf49cac,1,0,...) at sleepq_wait+0x63 _cv_wait(c704bcd4,c704bdd4,c0e5751e,6b,c704bcdc,...) at _cv_wait+0x243 usb_process(c704bccc,ecf49d28,c0e8034c,390,c701e578,...) at usb_process+0x193 fork_exit(c0850390,c704bccc,ecf49d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xecf49d60, ebp = 0 --- Tracing command usb pid 15 tid 100054 td 0xc7045000 sched_switch(c7045000,0,104,191,7d36e9fa,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c7045000,0,c0e8b160,268,c7045000,...) at sleepq_switch+0x162 sleepq_wait(c704bca4,0,ecf46cac,1,0,...) at sleepq_wait+0x63 _cv_wait(c704bca4,c704bdd4,c0e5751e,6b,c704bcac,...) at _cv_wait+0x243 usb_process(c704bc9c,ecf46d28,c0e8034c,390,c701e578,...) at usb_process+0x193 fork_exit(c0850390,c704bc9c,ecf46d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xecf46d60, ebp = 0 --- Tracing command usb pid 15 tid 100052 td 0xc70455c0 sched_switch(c70455c0,0,104,191,7d36d4fd,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c70455c0,0,c0e8b160,268,c70455c0,...) at sleepq_switch+0x162 sleepq_wait(c7040b5c,0,ecf3dcac,1,0,...) at sleepq_wait+0x63 _cv_wait(c7040b5c,c7040bfc,c0e5751e,6b,c7040b64,...) at _cv_wait+0x243 usb_process(c7040b54,ecf3dd28,c0e8034c,390,c701e578,...) at usb_process+0x193 fork_exit(c0850390,c7040b54,ecf3dd28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xecf3dd60, ebp = 0 --- Tracing command usb pid 15 tid 100051 td 0xc7034000 sched_switch(c7034000,0,104,191,ba050dea,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c7034000,0,c0e8b160,268,c7034000,...) at sleepq_switch+0x162 sleepq_wait(c7040b2c,0,ecf3acac,1,0,...) at sleepq_wait+0x63 _cv_wait(c7040b2c,c7040bfc,c0e5751e,6b,c7040b34,...) at _cv_wait+0x243 usb_process(c7040b24,ecf3ad28,c0e8034c,390,c701e578,...) at usb_process+0x193 fork_exit(c0850390,c7040b24,ecf3ad28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xecf3ad60, ebp = 0 --- Tracing command usb pid 15 tid 100050 td 0xc70342e0 sched_switch(c70342e0,0,104,191,7ccfd7d6,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c70342e0,0,c0e8b160,268,c70342e0,...) at sleepq_switch+0x162 sleepq_wait(c7040afc,0,ecf37cac,1,0,...) at sleepq_wait+0x63 _cv_wait(c7040afc,c7040bfc,c0e5751e,6b,c7040b04,...) at _cv_wait+0x243 usb_process(c7040af4,ecf37d28,c0e8034c,390,c701e578,...) at usb_process+0x193 fork_exit(c0850390,c7040af4,ecf37d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xecf37d60, ebp = 0 --- Tracing command usb pid 15 tid 100049 td 0xc70345c0 sched_switch(c70345c0,0,104,191,7ccfc66f,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c70345c0,0,c0e8b160,268,c70345c0,...) at sleepq_switch+0x162 sleepq_wait(c7040acc,0,ecf34cac,1,0,...) at sleepq_wait+0x63 _cv_wait(c7040acc,c7040bfc,c0e5751e,6b,c7040ad4,...) at _cv_wait+0x243 usb_process(c7040ac4,ecf34d28,c0e8034c,390,c701e578,...) at usb_process+0x193 fork_exit(c0850390,c7040ac4,ecf34d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xecf34d60, ebp = 0 --- Tracing command usb pid 15 tid 100048 td 0xc70348a0 sched_switch(c70348a0,0,104,191,7ccfae77,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c70348a0,0,c0e8b160,268,c70348a0,...) at sleepq_switch+0x162 sleepq_wait(c7039b5c,0,e0bffcac,1,0,...) at sleepq_wait+0x63 _cv_wait(c7039b5c,c7039bfc,c0e5751e,6b,c7039b64,...) at _cv_wait+0x243 usb_process(c7039b54,e0bffd28,c0e8034c,390,c701e578,...) at usb_process+0x193 fork_exit(c0850390,c7039b54,e0bffd28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe0bffd60, ebp = 0 --- Tracing command usb pid 15 tid 100047 td 0xc7034b80 sched_switch(c7034b80,0,104,191,b78de41c,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c7034b80,0,c0e8b160,268,c7034b80,...) at sleepq_switch+0x162 sleepq_wait(c7039b2c,0,e0bfccac,1,0,...) at sleepq_wait+0x63 _cv_wait(c7039b2c,c7039bfc,c0e5751e,6b,c7039b34,...) at _cv_wait+0x243 usb_process(c7039b24,e0bfcd28,c0e8034c,390,c701e578,...) at usb_process+0x193 fork_exit(c0850390,c7039b24,e0bfcd28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe0bfcd60, ebp = 0 --- Tracing command usb pid 15 tid 100046 td 0xc7035000 sched_switch(c7035000,0,104,191,76ddbed7,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c7035000,0,c0e8b160,268,c7035000,...) at sleepq_switch+0x162 sleepq_wait(c7039afc,0,e0bf9cac,1,0,...) at sleepq_wait+0x63 _cv_wait(c7039afc,c7039bfc,c0e5751e,6b,c7039b04,...) at _cv_wait+0x243 usb_process(c7039af4,e0bf9d28,c0e8034c,390,c701e578,...) at usb_process+0x193 fork_exit(c0850390,c7039af4,e0bf9d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe0bf9d60, ebp = 0 --- Tracing command usb pid 15 tid 100045 td 0xc70352e0 sched_switch(c70352e0,0,104,191,76ddadeb,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c70352e0,0,c0e8b160,268,c70352e0,...) at sleepq_switch+0x162 sleepq_wait(c7039acc,0,e0bf6cac,1,0,...) at sleepq_wait+0x63 _cv_wait(c7039acc,c7039bfc,c0e5751e,6b,c7039ad4,...) at _cv_wait+0x243 usb_process(c7039ac4,e0bf6d28,c0e8034c,390,c701e578,...) at usb_process+0x193 fork_exit(c0850390,c7039ac4,e0bf6d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe0bf6d60, ebp = 0 --- Tracing command usb pid 15 tid 100044 td 0xc70355c0 sched_switch(c70355c0,0,104,191,76dd9915,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c70355c0,0,c0e8b160,268,c70355c0,...) at sleepq_switch+0x162 sleepq_wait(c702fb5c,0,e0bf2cac,1,0,...) at sleepq_wait+0x63 _cv_wait(c702fb5c,c702fbfc,c0e5751e,6b,c702fb64,...) at _cv_wait+0x243 usb_process(c702fb54,e0bf2d28,c0e8034c,390,c701e578,...) at usb_process+0x193 fork_exit(c0850390,c702fb54,e0bf2d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe0bf2d60, ebp = 0 --- Tracing command usb pid 15 tid 100043 td 0xc70358a0 sched_switch(c70358a0,0,104,191,b78dca17,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c70358a0,0,c0e8b160,268,c70358a0,...) at sleepq_switch+0x162 sleepq_wait(c702fb2c,0,e0befcac,1,0,...) at sleepq_wait+0x63 _cv_wait(c702fb2c,c702fbfc,c0e5751e,6b,c702fb34,...) at _cv_wait+0x243 usb_process(c702fb24,e0befd28,c0e8034c,390,c701e578,...) at usb_process+0x193 fork_exit(c0850390,c702fb24,e0befd28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe0befd60, ebp = 0 --- Tracing command usb pid 15 tid 100042 td 0xc7035b80 sched_switch(c7035b80,0,104,191,76769acd,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c7035b80,0,c0e8b160,268,c7035b80,...) at sleepq_switch+0x162 sleepq_wait(c702fafc,0,e0beccac,1,0,...) at sleepq_wait+0x63 _cv_wait(c702fafc,c702fbfc,c0e5751e,6b,c702fb04,...) at _cv_wait+0x243 usb_process(c702faf4,e0becd28,c0e8034c,390,c701e578,...) at usb_process+0x193 fork_exit(c0850390,c702faf4,e0becd28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe0becd60, ebp = 0 --- Tracing command usb pid 15 tid 100041 td 0xc7036000 sched_switch(c7036000,0,104,191,767688df,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c7036000,0,c0e8b160,268,c7036000,...) at sleepq_switch+0x162 sleepq_wait(c702facc,0,e0be9cac,1,0,...) at sleepq_wait+0x63 _cv_wait(c702facc,c702fbfc,c0e5751e,6b,c702fad4,...) at _cv_wait+0x243 usb_process(c702fac4,e0be9d28,c0e8034c,390,c701e578,...) at usb_process+0x193 fork_exit(c0850390,c702fac4,e0be9d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe0be9d60, ebp = 0 --- Tracing command usb pid 15 tid 100039 td 0xc701f2e0 sched_switch(c701f2e0,0,104,191,76767628,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c701f2e0,0,c0e8b160,268,c701f2e0,...) at sleepq_switch+0x162 sleepq_wait(c7029b5c,0,e0be2cac,1,0,...) at sleepq_wait+0x63 _cv_wait(c7029b5c,c7029bfc,c0e5751e,6b,c7029b64,...) at _cv_wait+0x243 usb_process(c7029b54,e0be2d28,c0e8034c,390,c701e578,...) at usb_process+0x193 fork_exit(c0850390,c7029b54,e0be2d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe0be2d60, ebp = 0 --- Tracing command usb pid 15 tid 100038 td 0xc701f5c0 sched_switch(c701f5c0,0,104,191,b78d874e,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c701f5c0,0,c0e8b160,268,c701f5c0,...) at sleepq_switch+0x162 sleepq_wait(c7029b2c,0,e0bdfcac,1,0,...) at sleepq_wait+0x63 _cv_wait(c7029b2c,c7029bfc,c0e5751e,6b,c7029b34,...) at _cv_wait+0x243 usb_process(c7029b24,e0bdfd28,c0e8034c,390,c701e578,...) at usb_process+0x193 fork_exit(c0850390,c7029b24,e0bdfd28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe0bdfd60, ebp = 0 --- Tracing command usb pid 15 tid 100037 td 0xc701f8a0 sched_switch(c701f8a0,0,104,191,7610404c,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c701f8a0,0,c0e8b160,268,c701f8a0,...) at sleepq_switch+0x162 sleepq_wait(c7029afc,0,e0bdccac,1,0,...) at sleepq_wait+0x63 _cv_wait(c7029afc,c7029bfc,c0e5751e,6b,c7029b04,...) at _cv_wait+0x243 usb_process(c7029af4,e0bdcd28,c0e8034c,390,c701e578,...) at usb_process+0x193 fork_exit(c0850390,c7029af4,e0bdcd28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe0bdcd60, ebp = 0 --- Tracing command usb pid 15 tid 100036 td 0xc701fb80 sched_switch(c701fb80,0,104,191,76102d19,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c701fb80,0,c0e8b160,268,c701fb80,...) at sleepq_switch+0x162 sleepq_wait(c7029acc,0,e0bd9cac,1,0,...) at sleepq_wait+0x63 _cv_wait(c7029acc,c7029bfc,c0e5751e,6b,c7029ad4,...) at _cv_wait+0x243 usb_process(c7029ac4,e0bd9d28,c0e8034c,390,c701e578,...) at usb_process+0x193 fork_exit(c0850390,c7029ac4,e0bd9d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe0bd9d60, ebp = 0 --- Tracing command usb pid 15 tid 100034 td 0xc70222e0 sched_switch(c70222e0,0,104,191,7610162a,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c70222e0,0,c0e8b160,268,c70222e0,...) at sleepq_switch+0x162 sleepq_wait(c7019b5c,0,e0bd2cac,1,0,...) at sleepq_wait+0x63 _cv_wait(c7019b5c,c7019bfc,c0e5751e,6b,c7019b64,...) at _cv_wait+0x243 usb_process(c7019b54,e0bd2d28,c0e8034c,390,c701e578,...) at usb_process+0x193 fork_exit(c0850390,c7019b54,e0bd2d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe0bd2d60, ebp = 0 --- Tracing command usb pid 15 tid 100033 td 0xc70225c0 sched_switch(c70225c0,0,104,191,b1f92408,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c70225c0,0,c0e8b160,268,c70225c0,...) at sleepq_switch+0x162 sleepq_wait(c7019b2c,0,e0bcfcac,1,0,...) at sleepq_wait+0x63 _cv_wait(c7019b2c,c7019bfc,c0e5751e,6b,c7019b34,...) at _cv_wait+0x243 usb_process(c7019b24,e0bcfd28,c0e8034c,390,c701e578,...) at usb_process+0x193 fork_exit(c0850390,c7019b24,e0bcfd28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe0bcfd60, ebp = 0 --- Tracing command usb pid 15 tid 100032 td 0xc70228a0 sched_switch(c70228a0,0,104,191,75a92550,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c70228a0,0,c0e8b160,268,c70228a0,...) at sleepq_switch+0x162 sleepq_wait(c7019afc,0,e0bcccac,1,0,...) at sleepq_wait+0x63 _cv_wait(c7019afc,c7019bfc,c0e5751e,6b,c7019b04,...) at _cv_wait+0x243 usb_process(c7019af4,e0bccd28,c0e8034c,390,c701e578,...) at usb_process+0x193 fork_exit(c0850390,c7019af4,e0bccd28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe0bccd60, ebp = 0 --- Tracing command usb pid 15 tid 100031 td 0xc7022b80 sched_switch(c7022b80,0,104,191,75a91177,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c7022b80,0,c0e8b160,268,c7022b80,...) at sleepq_switch+0x162 sleepq_wait(c7019acc,0,e0bc9cac,1,0,...) at sleepq_wait+0x63 _cv_wait(c7019acc,c7019bfc,c0e5751e,6b,c7019ad4,...) at _cv_wait+0x243 usb_process(c7019ac4,e0bc9d28,c0e8034c,390,c701e578,...) at usb_process+0x193 fork_exit(c0850390,c7019ac4,e0bc9d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe0bc9d60, ebp = 0 --- Tracing command yarrow pid 14 tid 100017 td 0xc6ecb5c0 sched_switch(c6ecb5c0,0,104,191,ce677dd0,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c6ecb5c0,0,c0e8b160,28b,2,...) at sleepq_switch+0x162 sleepq_timedwait(c1011684,0,c0e78e72,2,0,...) at sleepq_timedwait+0x6b _sleep(c1011684,0,0,c0e78e72,64,...) at _sleep+0x342 pause(c0e78e72,64,c0e45f2e,111,0,...) at pause+0x47 random_kthread(0,c6b55d28,c0e8034c,390,c6dbd000,...) at random_kthread+0x1ef fork_exit(c07bb4a0,0,c6b55d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc6b55d60, ebp = 0 --- Tracing command geom pid 13 tid 100015 td 0xc6ecbb80 sched_switch(c6ecbb80,0,104,191,5d0e6dd7,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,5c,...) at mi_switch+0x219 sleepq_switch(c6ecbb80,0,c0e8b160,268,0,...) at sleepq_switch+0x162 sleepq_wait(c100f2c8,5c,c0e78e72,0,0,...) at sleepq_wait+0x63 _sleep(c100f2c8,c100f228,25c,c0e78e72,0,...) at _sleep+0x372 g_io_schedule_down(c6ecbb80,0,c0e7a9c5,6c,c6b4fd14,...) at g_io_schedule_down+0x5b g_down_procbody(0,c6b4fd28,c0e8034c,390,c6dbd2bc,...) at g_down_procbody+0x9d fork_exit(c093e770,0,c6b4fd28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc6b4fd60, ebp = 0 --- Tracing command geom pid 13 tid 100014 td 0xc6ecc000 sched_switch(c6ecc000,0,104,191,5d52f7f3,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,5c,...) at mi_switch+0x219 sleepq_switch(c6ecc000,0,c0e8b160,268,0,...) at sleepq_switch+0x162 sleepq_wait(c100f2c4,5c,c0e78e72,0,0,...) at sleepq_wait+0x63 _sleep(c100f2c4,c100f248,25c,c0e78e72,0,...) at _sleep+0x372 g_io_schedule_up(c6ecc000,0,c0e7a9c5,5f,c6b4cd14,...) at g_io_schedule_up+0x134 g_up_procbody(0,c6b4cd28,c0e8034c,390,c6dbd2bc,...) at g_up_procbody+0x9d fork_exit(c093e810,0,c6b4cd28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc6b4cd60, ebp = 0 --- Tracing command geom pid 13 tid 100013 td 0xc6dc02e0 sched_switch(c6dc02e0,0,104,191,726036df,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,5c,...) at mi_switch+0x219 sleepq_switch(c6dc02e0,0,c0e8b160,268,0,...) at sleepq_switch+0x162 sleepq_wait(c100f2bc,5c,c0e78e72,0,0,...) at sleepq_wait+0x63 _sleep(c100f2bc,c100f1ec,25c,c0e78e72,0,...) at _sleep+0x372 g_run_events(c101f500,0,c0e7a9c5,79,c6b49d14,...) at g_run_events+0x526 g_event_procbody(0,c6b49d28,c0e8034c,390,c6dbd2bc,...) at g_event_procbody+0x90 fork_exit(c093e8b0,0,c6b49d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc6b49d60, ebp = 0 --- Tracing command intr pid 12 tid 100065 td 0xc7171b80 fork_trampoline() at fork_trampoline Tracing command intr pid 12 tid 100064 td 0xc7172000 sched_switch(c7172000,0,109,191,75a61e11,...) at sched_switch+0x3bc mi_switch(109,0,c0e80671,554,c7172000,...) at mi_switch+0x219 ithread_loop(c7162ae0,ecf75d28,c0e8034c,390,c6dbd578,...) at ithread_loop+0x1fb fork_exit(c097f750,c7162ae0,ecf75d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xecf75d60, ebp = 0 --- Tracing command intr pid 12 tid 100062 td 0xc70365c0 sched_switch(c70365c0,0,109,191,6086d83d,...) at sched_switch+0x3bc mi_switch(109,0,c0e80671,554,c70365c0,...) at mi_switch+0x219 ithread_loop(c7162b70,ecf65d28,c0e8034c,390,c6dbd578,...) at ithread_loop+0x1fb fork_exit(c097f750,c7162b70,ecf65d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xecf65d60, ebp = 0 --- Tracing command intr pid 12 tid 100059 td 0xc7043000 sched_switch(c7043000,0,109,191,75a5cbda,...) at sched_switch+0x3bc mi_switch(109,0,c0e80671,554,c7043000,...) at mi_switch+0x219 ithread_loop(c701db60,ecf59d28,c0e8034c,390,c6dbd578,...) at ithread_loop+0x1fb fork_exit(c097f750,c701db60,ecf59d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xecf59d60, ebp = 0 --- Tracing command intr pid 12 tid 100058 td 0xc70432e0 sched_switch(c70432e0,0,109,191,913b8f90,...) at sched_switch+0x3bc mi_switch(109,0,c0e80671,554,c70432e0,...) at mi_switch+0x219 ithread_loop(c701d210,ecf55d28,c0e8034c,390,c6dbd578,...) at ithread_loop+0x1fb fork_exit(c097f750,c701d210,ecf55d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xecf55d60, ebp = 0 --- Tracing command intr pid 12 tid 100053 td 0xc70452e0 fork_trampoline() at fork_trampoline Tracing command intr pid 12 tid 100040 td 0xc701f000 sched_switch(c701f000,0,109,191,9a80a655,...) at sched_switch+0x3bc mi_switch(109,0,c0e80671,554,c701f000,...) at mi_switch+0x219 ithread_loop(c70028a0,e0be6d28,c0e8034c,390,c6dbd578,...) at ithread_loop+0x1fb fork_exit(c097f750,c70028a0,e0be6d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe0be6d60, ebp = 0 --- Tracing command intr pid 12 tid 100035 td 0xc7022000 sched_switch(c7022000,0,109,191,9559d5a6,...) at sched_switch+0x3bc mi_switch(109,0,c0e80671,554,c7022000,...) at mi_switch+0x219 ithread_loop(c7002ab0,e0bd6d28,c0e8034c,390,c6dbd578,...) at ithread_loop+0x1fb fork_exit(c097f750,c7002ab0,e0bd6d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe0bd6d60, ebp = 0 --- Tracing command intr pid 12 tid 100030 td 0xc6ecc2e0 sched_switch(c6ecc2e0,0,109,191,936ba30d,...) at sched_switch+0x3bc mi_switch(109,0,c0e80671,554,c6ecc2e0,...) at mi_switch+0x219 ithread_loop(c701d080,e0bc6d28,c0e8034c,390,c6dbd578,...) at ithread_loop+0x1fb fork_exit(c097f750,c701d080,e0bc6d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe0bc6d60, ebp = 0 --- Tracing command intr pid 12 tid 100029 td 0xc6ecc5c0 sched_switch(c6ecc5c0,0,109,191,5d521562,...) at sched_switch+0x3bc mi_switch(109,0,c0e80671,554,c6ecc5c0,...) at mi_switch+0x219 ithread_loop(c7002340,e0a70d28,c0e8034c,390,c6dbd578,...) at ithread_loop+0x1fb fork_exit(c097f750,c7002340,e0a70d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe0a70d60, ebp = 0 --- Tracing command intr pid 12 tid 100028 td 0xc6ecc8a0 sched_switch(c6ecc8a0,0,109,191,d220434c,...) at sched_switch+0x3bc mi_switch(109,0,c0e80671,554,c6ecc8a0,...) at mi_switch+0x219 ithread_loop(c70024e0,e0a6cd28,c0e8034c,390,c6dbd578,...) at ithread_loop+0x1fb fork_exit(c097f750,c70024e0,e0a6cd28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe0a6cd60, ebp = 0 --- Tracing command intr pid 12 tid 100027 td 0xc6eccb80 sched_switch(c6eccb80,0,109,191,5d527ce1,...) at sched_switch+0x3bc mi_switch(109,0,c0e80671,554,c6eccb80,...) at mi_switch+0x219 ithread_loop(c6d83380,c6b73d28,c0e8034c,390,c6dbd578,...) at ithread_loop+0x1fb fork_exit(c097f750,c6d83380,c6b73d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc6b73d60, ebp = 0 --- Tracing command intr pid 12 tid 100021 td 0xc6e068a0 sched_switch(c6e068a0,0,109,191,3cb1fba4,...) at sched_switch+0x3bc mi_switch(109,0,c0e80671,554,c6e068a0,...) at mi_switch+0x219 ithread_loop(c6d83430,c6b61d28,c0e8034c,390,c6dbd578,...) at ithread_loop+0x1fb fork_exit(c097f750,c6d83430,c6b61d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc6b61d60, ebp = 0 --- Tracing command intr pid 12 tid 100020 td 0xc6e06b80 sched_switch(c6e06b80,0,109,191,d2cd6fa4,...) at sched_switch+0x3bc mi_switch(109,0,c0e80671,554,c6e06b80,...) at mi_switch+0x219 ithread_loop(c6d83440,c6b5ed28,c0e8034c,390,c6dbd578,...) at ithread_loop+0x1fb fork_exit(c097f750,c6d83440,c6b5ed28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc6b5ed60, ebp = 0 --- Tracing command intr pid 12 tid 100018 td 0xc6ecb2e0 fork_trampoline() at fork_trampoline Tracing command intr pid 12 tid 100012 td 0xc6dc05c0 sched_switch(c6dc05c0,0,109,191,ca393ecb,...) at sched_switch+0x3bc mi_switch(109,0,c0e80671,554,c6dc05c0,...) at mi_switch+0x219 ithread_loop(c6d836d0,c6b46d28,c0e8034c,390,c6dbd578,...) at ithread_loop+0x1fb fork_exit(c097f750,c6d836d0,c6b46d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc6b46d60, ebp = 0 --- Tracing command intr pid 12 tid 100011 td 0xc6dc08a0 sched_switch(c6dc08a0,0,109,191,a0ce035e,...) at sched_switch+0x3bc mi_switch(109,0,c0e80671,554,c6dc08a0,...) at mi_switch+0x219 ithread_loop(c6d836e0,c6b43d28,c0e8034c,390,c6dbd578,...) at ithread_loop+0x1fb fork_exit(c097f750,c6d836e0,c6b43d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc6b43d60, ebp = 0 --- Tracing command intr pid 12 tid 100010 td 0xc6dc0b80 sched_switch(c6dc0b80,0,109,191,c3ac8d16,...) at sched_switch+0x3bc mi_switch(109,0,c0e80671,554,c6dc0b80,...) at mi_switch+0x219 ithread_loop(c6d836f0,c6b40d28,c0e8034c,390,c6dbd578,...) at ithread_loop+0x1fb fork_exit(c097f750,c6d836f0,c6b40d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc6b40d60, ebp = 0 --- Tracing command intr pid 12 tid 100009 td 0xc6e06000 sched_switch(c6e06000,0,109,191,d3045e81,...) at sched_switch+0x3bc mi_switch(109,0,c0e80671,554,c6e06000,...) at mi_switch+0x219 ithread_loop(c6d83700,c6b3dd28,c0e8034c,390,c6dbd578,...) at ithread_loop+0x1fb fork_exit(c097f750,c6d83700,c6b3dd28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc6b3dd60, ebp = 0 --- Tracing command intr pid 12 tid 100008 td 0xc6e062e0 fork_trampoline() at fork_trampoline Tracing command intr pid 12 tid 100007 td 0xc6e065c0 sched_switch(c6e065c0,0,109,191,3f67bdf7,...) at sched_switch+0x3bc mi_switch(109,0,c0e80671,554,c6e065c0,...) at mi_switch+0x219 ithread_loop(c6d83720,c6b37d28,c0e8034c,390,c6dbd578,...) at ithread_loop+0x1fb fork_exit(c097f750,c6d83720,c6b37d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc6b37d60, ebp = 0 --- Tracing command idle pid 11 tid 100006 td 0xc6dbf000 cpustop_handler(1,c6b33c04,c0cd25f6,ee,0,...) at cpustop_handler+0x34 ipi_nmi_handler(ee,0,11c74ce,c0cb7305,c6dbd834,...) at ipi_nmi_handler+0x2f trap(c6b33c10) at trap+0x36 calltrap() at calltrap+0x6 --- trap 0x13, eip = 0xc0cb7305, esp = 0xc6b33c50, ebp = 0xc6b33c50 --- acpi_cpu_c1(c6b33c6c,c0fd79d0,0,2f4,945,...) at acpi_cpu_c1+0x5 acpi_cpu_idle(0,c6b33ca0,c0cc5520,0,ffffffff,...) at acpi_cpu_idle+0x122 cpu_idle_acpi(0,ffffffff,c101f500,2,c6b33ce8,...) at cpu_idle_acpi+0x2f cpu_idle(0,c6b33cc4,c0e87a2b,a05,c6dbf000,...) at cpu_idle+0x90 sched_idletd(0,c6b33d28,c0e8034c,390,c6dbd834,...) at sched_idletd+0x263 fork_exit(c09ce540,0,c6b33d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc6b33d60, ebp = 0 --- Tracing command idle pid 11 tid 100005 td 0xc6dbf2e0 cpustop_handler(2,c6b30c04,c0cd25f6,ee,0,...) at cpustop_handler+0x34 ipi_nmi_handler(ee,0,0,c11b3b4c,c6dbd834,...) at ipi_nmi_handler+0x2f trap(c6b30c10) at trap+0x36 calltrap() at calltrap+0x6 --- trap 0x13, eip = 0xc0cb7305, esp = 0xc6b30c50, ebp = 0xc6b30c50 --- acpi_cpu_c1(c6b30c6c,c0fd79d0,1,2f4,945,...) at acpi_cpu_c1+0x5 acpi_cpu_idle(0,c6b30ca0,c0cc5520,0,ffffffff,...) at acpi_cpu_idle+0x122 cpu_idle_acpi(0,ffffffff,c101fb80,2,c6b30ce8,...) at cpu_idle_acpi+0x2f cpu_idle(0,c6b30cc4,c0e87a2b,a05,c6dbf2e0,...) at cpu_idle+0x90 sched_idletd(0,c6b30d28,c0e8034c,390,c6dbd834,...) at sched_idletd+0x263 fork_exit(c09ce540,0,c6b30d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc6b30d60, ebp = 0 --- Tracing command idle pid 11 tid 100004 td 0xc6dbf5c0 kdb_enter(c0d0f935,c0e4fcf0,0,c716a080,0,...) at kdb_enter+0x3a uart_intr(c716a000,0,c11b3e78,c6b2dbe8,0,...) at uart_intr+0x126 intr_event_handle(c6e04d80,c6b2dc10,c6b2dc34,0,c6fe0600,...) at intr_event_handle+0x76 intr_execute_handlers(c6e030d0,c6b2dc10,2,c6b2dc50,c0cbbd05,...) at intr_execute_handlers+0x49 lapic_handle_intr(30,c6b2dc10) at lapic_handle_intr+0x36 Xapic_isr1() at Xapic_isr1+0x35 --- interrupt, eip = 0xc0cb7305, esp = 0xc6b2dc50, ebp = 0xc6b2dc50 --- acpi_cpu_c1(c6b2dc6c,c0fd79d0,2,2f4,945,...) at acpi_cpu_c1+0x5 acpi_cpu_idle(0,c6b2dca0,c0cc5520,0,ffffffff,...) at acpi_cpu_idle+0x122 cpu_idle_acpi(0,ffffffff,c1020200,2,c6b2dce8,...) at cpu_idle_acpi+0x2f cpu_idle(0,c6b2dcc4,c0e87a2b,a05,c6dbf5c0,...) at cpu_idle+0x90 sched_idletd(0,c6b2dd28,c0e8034c,390,c6dbd834,...) at sched_idletd+0x263 fork_exit(c09ce540,0,c6b2dd28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc6b2dd60, ebp = 0 --- Tracing command idle pid 11 tid 100003 td 0xc6dbf8a0 cpustop_handler(8,c6b2ac04,c0cd25f6,ee,0,...) at cpustop_handler+0x34 ipi_nmi_handler(ee,0,0,c11b41a4,c6dbd834,...) at ipi_nmi_handler+0x2f trap(c6b2ac10) at trap+0x36 calltrap() at calltrap+0x6 --- trap 0x13, eip = 0xc0cb7305, esp = 0xc6b2ac50, ebp = 0xc6b2ac50 --- acpi_cpu_c1(c6b2ac6c,c0fd79d0,3,2f4,945,...) at acpi_cpu_c1+0x5 acpi_cpu_idle(0,c6b2aca0,c0cc5520,0,ffffffff,...) at acpi_cpu_idle+0x122 cpu_idle_acpi(0,ffffffff,c1020880,2,c6b2ace8,...) at cpu_idle_acpi+0x2f cpu_idle(0,c6b2acc4,c0e87a2b,a05,c6dbf8a0,...) at cpu_idle+0x90 sched_idletd(0,c6b2ad28,c0e8034c,390,c6dbd834,...) at sched_idletd+0x263 fork_exit(c09ce540,0,c6b2ad28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc6b2ad60, ebp = 0 --- Tracing command init pid 1 tid 100002 td 0xc6dbfb80 sched_switch(c6dbfb80,0,104,191,2cc34bf,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,6c,...) at mi_switch+0x219 sleepq_switch(c6dbfb80,0,c0e8b160,1a7,6c,...) at sleepq_switch+0x162 sleepq_catch_signals(c0e8b160,160,0,100,100,...) at sleepq_catch_signals+0xf9 sleepq_wait_sig(c6dbdaf0,6c,c0e8def2,100,0,...) at sleepq_wait_sig+0x17 _sleep(c6dbdaf0,c6dbdb78,16c,c0e8def2,0,...) at _sleep+0x35c kern_wait(c6dbfb80,ffffffff,c6b26c44,2,0,...) at kern_wait+0xbe6 wait4(c6dbfb80,c6b26cec,8112908,1,0,...) at wait4+0x3b syscallenter(c6dbfb80,c6b26ce4,c0cd2d3d,c1010f70,0,...) at syscallenter+0x263 syscall(c6b26d28) at syscall+0x4f Xint0x80_syscall() at Xint0x80_syscall+0x21 --- syscall (7, FreeBSD ELF32, wait4), eip = 0x8060de7, esp = 0xbfbfe81c, ebp = 0xbfbfe838 --- Tracing command audit pid 10 tid 100001 td 0xc6dc0000 sched_switch(c6dc0000,0,104,191,75a6f3e0,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c6dc0000,0,c0e8b160,268,c6dc0000,...) at sleepq_switch+0x162 sleepq_wait(c118e700,0,c6b23c8c,1,0,...) at sleepq_wait+0x63 _cv_wait(c118e700,c118e6e4,c0eb5777,194,0,...) at _cv_wait+0x243 audit_worker(0,c6b23d28,c0e8034c,390,c6dbe000,...) at audit_worker+0x84 fork_exit(c0b9aeb0,0,c6b23d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc6b23d60, ebp = 0 --- Tracing command kernel pid 0 tid 100076 td 0xc70458a0 sched_switch(c70458a0,0,104,191,946fab26,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c70458a0,0,c0e8b160,268,0,...) at sleepq_switch+0x162 sleepq_wait(c71853c0,0,c0e78e72,0,0,...) at sleepq_wait+0x63 _sleep(c71853c0,c71853d8,0,c0e78e72,0,...) at _sleep+0x372 taskqueue_thread_loop(c11e61a0,ef185d28,c0e8034c,390,c100f3a0,...) at taskqueue_thread_loop+0xb5 fork_exit(c09ea9e0,c11e61a0,ef185d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xef185d60, ebp = 0 --- Tracing command kernel pid 0 tid 100066 td 0xc71718a0 sched_switch(c71718a0,0,104,191,c59d1991,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c71718a0,0,c0e8b160,28b,2,...) at sleepq_switch+0x162 sleepq_timedwait(c1011684,0,c0e78e72,2,0,...) at sleepq_timedwait+0x6b _sleep(c1011684,0,0,c0e78e72,bb8,...) at _sleep+0x342 pause(c0e78e72,bb8,124,122,c100f428,...) at pause+0x47 deadlkres(0,ecf7bd28,c0e8034c,390,c100f3a0,...) at deadlkres+0x324 fork_exit(c095ffe0,0,ecf7bd28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xecf7bd60, ebp = 0 --- Tracing command kernel pid 0 tid 100060 td 0xc7036b80 sched_switch(c7036b80,0,104,191,d304b392,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c7036b80,0,c0e8b160,268,0,...) at sleepq_switch+0x162 sleepq_wait(c7073900,0,c0e86781,c0e78e72,0,...) at sleepq_wait+0x63 msleep_spin(c7073900,c7073918,c0e78e72,0,c0e83fe7,...) at msleep_spin+0x21d taskqueue_thread_loop(c706f49c,ecf5fd28,c0e8034c,390,c100f3a0,...) at taskqueue_thread_loop+0x8f fork_exit(c09ea9e0,c706f49c,ecf5fd28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xecf5fd60, ebp = 0 --- Tracing command kernel pid 0 tid 100026 td 0xc6fab000 sched_switch(c6fab000,0,104,191,90464aba,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c6fab000,0,c0e8b160,268,0,...) at sleepq_switch+0x162 sleepq_wait(c6d92340,0,c0e86781,c0e78e72,0,...) at sleepq_wait+0x63 msleep_spin(c6d92340,c6d92358,c0e78e72,0,c0e83fe7,...) at msleep_spin+0x21d taskqueue_thread_loop(c0fda638,c6b70d28,c0e8034c,390,c100f3a0,...) at taskqueue_thread_loop+0x8f fork_exit(c09ea9e0,c0fda638,c6b70d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc6b70d60, ebp = 0 --- Tracing command kernel pid 0 tid 100025 td 0xc6fab2e0 sched_switch(c6fab2e0,0,104,191,90463d3a,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c6fab2e0,0,c0e8b160,268,0,...) at sleepq_switch+0x162 sleepq_wait(c6d92340,0,c0e86781,c0e78e72,0,...) at sleepq_wait+0x63 msleep_spin(c6d92340,c6d92358,c0e78e72,0,c0e83fe7,...) at msleep_spin+0x21d taskqueue_thread_loop(c0fda638,c6b6dd28,c0e8034c,390,c100f3a0,...) at taskqueue_thread_loop+0x8f fork_exit(c09ea9e0,c0fda638,c6b6dd28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc6b6dd60, ebp = 0 --- Tracing command kernel pid 0 tid 100024 td 0xc6fab5c0 sched_switch(c6fab5c0,0,104,191,90462a9f,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c6fab5c0,0,c0e8b160,268,0,...) at sleepq_switch+0x162 sleepq_wait(c6d92340,0,c0e86781,c0e78e72,0,...) at sleepq_wait+0x63 msleep_spin(c6d92340,c6d92358,c0e78e72,0,c0e83fe7,...) at msleep_spin+0x21d taskqueue_thread_loop(c0fda638,c6b6ad28,c0e8034c,390,c100f3a0,...) at taskqueue_thread_loop+0x8f fork_exit(c09ea9e0,c0fda638,c6b6ad28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc6b6ad60, ebp = 0 --- Tracing command kernel pid 0 tid 100023 td 0xc6fab8a0 sched_switch(c6fab8a0,0,104,191,903f81e5,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c6fab8a0,0,c0e8b160,268,0,...) at sleepq_switch+0x162 sleepq_wait(c6d92380,0,c0e78e72,0,0,...) at sleepq_wait+0x63 _sleep(c6d92380,c6d92398,0,c0e78e72,0,...) at _sleep+0x372 taskqueue_thread_loop(c100fd18,c6b67d28,c0e8034c,390,c100f3a0,...) at taskqueue_thread_loop+0xb5 fork_exit(c09ea9e0,c100fd18,c6b67d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc6b67d60, ebp = 0 --- Tracing command kernel pid 0 tid 100022 td 0xc6fabb80 sched_switch(c6fabb80,0,104,191,903f716f,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c6fabb80,0,c0e8b160,268,0,...) at sleepq_switch+0x162 sleepq_wait(c6d923c0,0,c0e78e72,0,0,...) at sleepq_wait+0x63 _sleep(c6d923c0,c6d923d8,0,c0e78e72,0,...) at _sleep+0x372 taskqueue_thread_loop(c118ece4,c6b64d28,c0e8034c,390,c100f3a0,...) at taskqueue_thread_loop+0xb5 fork_exit(c09ea9e0,c118ece4,c6b64d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc6b64d60, ebp = 0 --- Tracing command kernel pid 0 tid 100019 td 0xc6ecb000 sched_switch(c6ecb000,0,104,191,92e8df37,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c6ecb000,0,c0e8b160,268,0,...) at sleepq_switch+0x162 sleepq_wait(c6d92500,0,c0e78e72,0,0,...) at sleepq_wait+0x63 _sleep(c6d92500,c6d92518,0,c0e78e72,0,...) at _sleep+0x372 taskqueue_thread_loop(c102e228,c6b5bd28,c0e8034c,390,c100f3a0,...) at taskqueue_thread_loop+0xb5 fork_exit(c09ea9e0,c102e228,c6b5bd28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc6b5bd60, ebp = 0 --- Tracing command kernel pid 0 tid 100016 td 0xc6ecb8a0 sched_switch(c6ecb8a0,0,104,191,b5b0fce7,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,0,...) at mi_switch+0x219 sleepq_switch(c6ecb8a0,0,c0e8b160,268,0,...) at sleepq_switch+0x162 sleepq_wait(c6d92a80,0,c0e78e72,0,0,...) at sleepq_wait+0x63 _sleep(c6d92a80,c6d92a98,0,c0e78e72,0,...) at _sleep+0x372 taskqueue_thread_loop(c102cca0,c6b52d28,c0e8034c,390,c100f3a0,...) at taskqueue_thread_loop+0xb5 fork_exit(c09ea9e0,c102cca0,c6b52d28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xc6b52d60, ebp = 0 --- Tracing command kernel pid 0 tid 100000 td 0xc100f660 sched_switch(c100f660,0,104,191,ae1ff8ae,...) at sched_switch+0x3bc mi_switch(104,0,c0e8b160,1f3,54,...) at mi_switch+0x219 sleepq_switch(c100f660,0,c0e8b160,28b,0,...) at sleepq_switch+0x162 sleepq_timedwait(c100f3a0,54,c0e8833f,0,0,...) at sleepq_timedwait+0x6b _sleep(c100f3a0,0,54,c0e8833f,2710,...) at _sleep+0x342 scheduler(0,141ec00,141ec00,141e000,1425000,...) at scheduler+0x240 mi_startup() at mi_startup+0xac begin() at begin+0x2c db:0:allt> call doadump Physical memory: 3439 MB Dumping 147 MB: 132 116 100 84 68 52 36 20 4 Dump complete = 0xf db:0:doadump> reset cpu_reset: Restarting BSP cpu_reset_proxy: Stopped CPU 2 $ svn diff -x -p /usr/src/sys Index: /usr/src/sys/ufs/ufs/ufsmount.h =================================================================== --- /usr/src/sys/ufs/ufs/ufsmount.h (revision 221878) +++ /usr/src/sys/ufs/ufs/ufsmount.h (working copy) @@ -61,6 +61,7 @@ struct jblocks; struct inodedep; TAILQ_HEAD(inodedeplst, inodedep); +LIST_HEAD(bmsafemaphd, bmsafemap); /* This structure describes the UFS specific mount structure data. */ struct ufsmount { @@ -82,7 +83,8 @@ struct ufsmount { struct workhead softdep_journal_pending; /* journal work queue */ struct worklist *softdep_journal_tail; /* Tail pointer for above */ struct jblocks *softdep_jblocks; /* Journal block information */ - struct inodedeplst softdep_unlinked; /* Unlinked inodes */ + struct inodedeplst softdep_unlinked; /* Unlinked inodes */ + struct bmsafemaphd softdep_dirtycg; /* Dirty CGs */ int softdep_on_journal; /* Items on the journal list */ int softdep_on_worklist; /* Items on the worklist */ int softdep_on_worklist_inprogress; /* Busy items on worklist */ Index: /usr/src/sys/ufs/ffs/ffs_softdep.c =================================================================== --- /usr/src/sys/ufs/ffs/ffs_softdep.c (revision 221878) +++ /usr/src/sys/ufs/ffs/ffs_softdep.c (working copy) @@ -52,6 +52,8 @@ __FBSDID("$FreeBSD$"); #define DEBUG #endif +/* #define SUJ_DEBUG 1 */ + #include #include #include @@ -214,6 +216,17 @@ softdep_setup_allocindir_meta(nbp, ip, bp, ptrno, } void +softdep_journal_freeblocks(ip, cred, length, flags) + struct inode *ip; + struct ucred *cred; + off_t length; + int flags; +{ + + panic("softdep_journal_freeblocks called"); +} + +void softdep_setup_freeblocks(ip, length, flags) struct inode *ip; off_t length; @@ -618,6 +631,7 @@ FEATURE(softupdates, "FFS soft-updates support"); unsigned long dep_current[D_LAST + 1]; unsigned long dep_total[D_LAST + 1]; +unsigned long dep_write[D_LAST + 1]; SYSCTL_NODE(_debug, OID_AUTO, softdep, CTLFLAG_RW, 0, "soft updates stats"); @@ -625,13 +639,17 @@ SYSCTL_NODE(_debug_softdep, OID_AUTO, total, CTLFL "total dependencies allocated"); SYSCTL_NODE(_debug_softdep, OID_AUTO, current, CTLFLAG_RW, 0, "current dependencies allocated"); +SYSCTL_NODE(_debug_softdep, OID_AUTO, write, CTLFLAG_RW, 0, + "current dependencies written"); #define SOFTDEP_TYPE(type, str, long) \ static MALLOC_DEFINE(M_ ## type, #str, long); \ SYSCTL_ULONG(_debug_softdep_total, OID_AUTO, str, CTLFLAG_RD, \ &dep_total[D_ ## type], 0, ""); \ SYSCTL_ULONG(_debug_softdep_current, OID_AUTO, str, CTLFLAG_RD, \ - &dep_current[D_ ## type], 0, ""); + &dep_current[D_ ## type], 0, ""); \ + SYSCTL_ULONG(_debug_softdep_write, OID_AUTO, str, CTLFLAG_RD, \ + &dep_write[D_ ## type], 0, ""); SOFTDEP_TYPE(PAGEDEP, pagedep, "File page dependencies"); SOFTDEP_TYPE(INODEDEP, inodedep, "Inode dependencies"); @@ -736,8 +754,10 @@ static int flush_pagedep_deps(struct vnode *, stru struct diraddhd *); static void free_pagedep(struct pagedep *); static int flush_newblk_dep(struct vnode *, struct mount *, ufs_lbn_t); -static int flush_inodedep_deps(struct mount *, ino_t); +static int flush_inodedep_deps(struct vnode *, struct mount *, ino_t); static int flush_deplist(struct allocdirectlst *, int, int *); +static int flush_trunclist(struct vnode *, struct inodedep *, int, int *); +static void sync_cgs(struct mount *, int); static int handle_written_filepage(struct pagedep *, struct buf *); static int handle_written_sbdep(struct sbdep *, struct buf *); static void initiate_write_sbdep(struct sbdep *); @@ -752,6 +772,7 @@ static void handle_written_jseg(struct jseg *, str static void handle_written_jnewblk(struct jnewblk *); static void handle_written_jfreeblk(struct jfreeblk *); static void handle_written_jfreefrag(struct jfreefrag *); +static void handle_written_jtrunc(struct jtrunc *); static void complete_jseg(struct jseg *); static void jseg_write(struct ufsmount *ump, struct jseg *, uint8_t *); static void jaddref_write(struct jaddref *, struct jseg *, uint8_t *); @@ -768,7 +789,9 @@ static void handle_allocdirect_partdone(struct all static struct jnewblk *cancel_newblk(struct newblk *, struct worklist *, struct workhead *); static void indirdep_complete(struct indirdep *); -static int indirblk_inseg(struct mount *, ufs2_daddr_t); +static int indirblk_lookup(struct mount *, ufs2_daddr_t); +static void indirblk_insert(struct freework *); +static void indirblk_remove(struct freework *); static void handle_allocindir_partdone(struct allocindir *); static void initiate_write_filepage(struct pagedep *, struct buf *); static void initiate_write_indirdep(struct indirdep*, struct buf *); @@ -780,7 +803,9 @@ static void handle_workitem_freefile(struct freefi static void handle_workitem_remove(struct dirrem *, struct vnode *); static struct dirrem *newdirrem(struct buf *, struct inode *, struct inode *, int, struct dirrem **); -static void cancel_indirdep(struct indirdep *, struct buf *, struct inodedep *, +static struct indirdep *indirdep_lookup(struct mount *, struct inode *, + struct buf *); +static void cancel_indirdep(struct indirdep *, struct buf *, struct freeblks *); static void free_indirdep(struct indirdep *); static void free_diradd(struct diradd *, struct workhead *); @@ -795,8 +820,13 @@ static void cancel_diradd(struct diradd *, struct struct jremref *, struct jremref *); static void dirrem_journal(struct dirrem *, struct jremref *, struct jremref *, struct jremref *); -static void cancel_allocindir(struct allocindir *, struct inodedep *, - struct freeblks *); +static void cancel_allocindir(struct allocindir *, struct buf *bp, + struct freeblks *, int); +static int setup_trunc_indir(struct freeblks *, struct inode *, + ufs_lbn_t, ufs_lbn_t, ufs2_daddr_t, int); +static void complete_trunc_indir(struct freework *); +static void trunc_indirdep(struct indirdep *, struct freeblks *, struct buf *, + int); static void complete_mkdir(struct mkdir *); static void free_newdirblk(struct newdirblk *); static void free_jremref(struct jremref *); @@ -818,30 +848,29 @@ static void cancel_jfreefrag(struct jfreefrag *); static inline void setup_freedirect(struct freeblks *, struct inode *, int, int); static inline void setup_freeext(struct freeblks *, struct inode *, int, int); -static inline void setup_freeindir(struct freeblks *, struct inode *, int i, +static inline void setup_freeindir(struct freeblks *, struct inode *, int, ufs_lbn_t, int); static inline struct freeblks *newfreeblks(struct mount *, struct inode *); static void indir_trunc(struct freework *, ufs2_daddr_t, ufs_lbn_t); -static void softdep_trunc_deps(struct vnode *, struct freeblks *, ufs_lbn_t, +static void trunc_dependencies(struct inode *, struct freeblks *, ufs_lbn_t, int, int); -static int cancel_pagedep(struct pagedep *, struct inodedep *, - struct freeblks *); -static int deallocate_dependencies(struct buf *, struct inodedep *, - struct freeblks *, int off); +static int cancel_pagedep(struct pagedep *, struct freeblks *, int); +static int deallocate_dependencies(struct buf *, struct freeblks *, int); +static void newblk_freefrag(struct newblk*); static void free_newblk(struct newblk *); static void cancel_allocdirect(struct allocdirectlst *, - struct allocdirect *, struct freeblks *, int); + struct allocdirect *, struct freeblks *); static int check_inode_unwritten(struct inodedep *); static int free_inodedep(struct inodedep *); static void freework_freeblock(struct freework *); static void handle_workitem_freeblocks(struct freeblks *, int); -static void handle_complete_freeblocks(struct freeblks *); +static void handle_complete_freeblocks(struct freeblks *, int); static void handle_workitem_indirblk(struct freework *); static void handle_written_freework(struct freework *); static void merge_inode_lists(struct allocdirectlst *,struct allocdirectlst *); static struct worklist *jnewblk_merge(struct worklist *, struct worklist *, struct workhead *); -static void setup_allocindir_phase2(struct buf *, struct inode *, +static struct freefrag *setup_allocindir_phase2(struct buf *, struct inode *, struct inodedep *, struct allocindir *, ufs_lbn_t); static struct allocindir *newallocindir(struct inode *, int, ufs2_daddr_t, ufs2_daddr_t, ufs_lbn_t); @@ -870,7 +899,9 @@ static void pause_timer(void *); static int request_cleanup(struct mount *, int); static int process_worklist_item(struct mount *, int); static void process_removes(struct vnode *); +static void process_truncates(struct vnode *); static void jwork_move(struct workhead *, struct workhead *); +static void jwork_insert(struct workhead *, struct jsegdep *); static void add_to_worklist(struct worklist *, int); static void remove_from_worklist(struct worklist *); static void softdep_flush(void); @@ -894,11 +925,12 @@ static inline void newinoref(struct inoref *, ino_ static inline struct jsegdep *inoref_jseg(struct inoref *); static struct jmvref *newjmvref(struct inode *, ino_t, off_t, off_t); static struct jfreeblk *newjfreeblk(struct freeblks *, ufs_lbn_t, - ufs2_daddr_t, int); + ufs2_daddr_t, int, int); +static void cancel_jfreeblk(struct freeblks *, ufs2_daddr_t); static struct jfreefrag *newjfreefrag(struct freefrag *, struct inode *, ufs2_daddr_t, long, ufs_lbn_t); static struct freework *newfreework(struct ufsmount *, struct freeblks *, - struct freework *, ufs_lbn_t, ufs2_daddr_t, int, int); + struct freework *, ufs_lbn_t, ufs2_daddr_t, int, int, int); static void jwait(struct worklist *wk); static struct inodedep *inodedep_lookup_ip(struct inode *); static int bmsafemap_rollbacks(struct bmsafemap *); @@ -1064,6 +1096,30 @@ jwork_move(dst, src) } } +static void +jwork_insert(dst, jsegdep) + struct workhead *dst; + struct jsegdep *jsegdep; +{ + struct jsegdep *jsegdepn; + struct worklist *wk; + + LIST_FOREACH(wk, dst, wk_list) + if (wk->wk_type == D_JSEGDEP) + break; + if (wk == NULL) { + WORKLIST_INSERT(dst, &jsegdep->jd_list); + return; + } + jsegdepn = WK_JSEGDEP(wk); + if (jsegdep->jd_seg->js_seq < jsegdepn->jd_seg->js_seq) { + WORKLIST_REMOVE(wk); + free_jsegdep(jsegdepn); + WORKLIST_INSERT(dst, &jsegdep->jd_list); + } else + free_jsegdep(jsegdep); +} + /* * Routines for tracking and managing workitems. */ @@ -1101,14 +1157,18 @@ workitem_alloc(item, type, mp) int type; struct mount *mp; { + struct ufsmount *ump; + item->wk_type = type; item->wk_mp = mp; item->wk_state = 0; + + ump = VFSTOUFS(mp); ACQUIRE_LOCK(&lk); dep_current[type]++; dep_total[type]++; - VFSTOUFS(mp)->softdep_deps++; - VFSTOUFS(mp)->softdep_accdeps++; + ump->softdep_deps++; + ump->softdep_accdeps++; FREE_LOCK(&lk); } @@ -1478,6 +1538,43 @@ process_removes(vp) } /* + * Process all truncations associated with a vnode if we are running out + * of journal space. This is called when the vnode lock is already held + * and no other process can clear the truncation. + */ +static void +process_truncates(vp) + struct vnode *vp; +{ + struct inodedep *inodedep; + struct freeblks *freeblks; + struct mount *mp; + ino_t inum; + + mtx_assert(&lk, MA_OWNED); + + mp = vp->v_mount; + inum = VTOI(vp)->i_number; + for (;;) { + if (inodedep_lookup(mp, inum, 0, &inodedep) == 0) + return; + TAILQ_FOREACH(freeblks, &inodedep->id_freeblklst, fb_next) + if ((freeblks->fb_state & (ALLCOMPLETE | ONWORKLIST)) == + (ALLCOMPLETE | ONWORKLIST)) + break; + if (freeblks == NULL) + return; + remove_from_worklist(&freeblks->fb_list); + FREE_LOCK(&lk); + if (vn_start_secondary_write(NULL, &mp, V_NOWAIT)) + panic("process_removes: suspended filesystem"); + handle_workitem_freeblocks(freeblks, 0); + vn_finished_secondary_write(mp); + ACQUIRE_LOCK(&lk); + } +} + +/* * Process one item on the worklist. */ static int @@ -1560,7 +1657,9 @@ process_worklist_item(mp, flags) case D_FREEWORK: /* Final block in an indirect was freed. */ + ACQUIRE_LOCK(&lk); handle_workitem_indirblk(WK_FREEWORK(wk)); + FREE_LOCK(&lk); break; default: @@ -1922,6 +2021,7 @@ inodedep_lookup(mp, inum, flags, inodedeppp) TAILQ_INIT(&inodedep->id_newinoupdt); TAILQ_INIT(&inodedep->id_extupdt); TAILQ_INIT(&inodedep->id_newextupdt); + TAILQ_INIT(&inodedep->id_freeblklst); LIST_INSERT_HEAD(inodedephd, inodedep, id_hash); *inodedeppp = inodedep; return (0); @@ -2008,43 +2108,86 @@ newblk_lookup(mp, newblkno, flags, newblkpp) } /* - * Structures and routines associated with indir caching. + * Structures and routines associated with freed indirect block caching. */ -struct workhead *indir_hashtbl; +struct freeworklst *indir_hashtbl; u_long indir_hash; /* size of hash table - 1 */ #define INDIR_HASH(mp, blkno) \ (&indir_hashtbl[((((register_t)(mp)) >> 13) + (blkno)) & indir_hash]) +/* + * Lookup an indirect block in the indir hash table. The freework is + * removed and potentially freed. The caller must do a blocking journal + * write before writing to the blkno. + */ static int -indirblk_inseg(mp, blkno) +indirblk_lookup(mp, blkno) struct mount *mp; ufs2_daddr_t blkno; { struct freework *freework; - struct workhead *wkhd; - struct worklist *wk; + struct freeworklst *wkhd; wkhd = INDIR_HASH(mp, blkno); - LIST_FOREACH(wk, wkhd, wk_list) { - freework = WK_FREEWORK(wk); - if (freework->fw_blkno == blkno && - freework->fw_list.wk_mp == mp) { - LIST_REMOVE(freework, fw_next); - WORKLIST_REMOVE(&freework->fw_list); - WORKITEM_FREE(freework, D_FREEWORK); - return (1); - } + TAILQ_FOREACH(freework, wkhd, fw_next) { + if (freework->fw_blkno != blkno) + continue; + if (freework->fw_list.wk_mp != mp) + continue; + indirblk_remove(freework); + return (1); } return (0); } /* + * Insert an indirect block represented by freework into the indirblk + * hash table so that it may prevent the block from being re-used prior + * to the journal being written. + */ +static void +indirblk_insert(freework) + struct freework *freework; +{ + struct freeblks *freeblks; + struct jsegdep *jsegdep; + struct worklist *wk; + + freeblks = freework->fw_freeblks; + LIST_FOREACH(wk, &freeblks->fb_jwork, wk_list) + if (wk->wk_type == D_JSEGDEP) + break; + if (wk == NULL) + return; + + jsegdep = WK_JSEGDEP(wk); + LIST_INSERT_HEAD(&jsegdep->jd_seg->js_indirs, freework, fw_segs); + TAILQ_INSERT_HEAD(INDIR_HASH(freework->fw_list.wk_mp, + freework->fw_blkno), freework, fw_next); + freework->fw_state &= ~DEPCOMPLETE; +} + +static void +indirblk_remove(freework) + struct freework *freework; +{ + + LIST_REMOVE(freework, fw_segs); + TAILQ_REMOVE(INDIR_HASH(freework->fw_list.wk_mp, + freework->fw_blkno), freework, fw_next); + freework->fw_state |= DEPCOMPLETE; + if ((freework->fw_state & ALLCOMPLETE) == ALLCOMPLETE) + WORKITEM_FREE(freework, D_FREEWORK); +} + +/* * Executed during filesystem system initialization before * mounting any filesystems. */ void softdep_initialize() { + int i; LIST_INIT(&mkdirlisthd); max_softdeps = desiredvnodes * 4; @@ -2052,7 +2195,12 @@ softdep_initialize() inodedep_hashtbl = hashinit(desiredvnodes, M_INODEDEP, &inodedep_hash); newblk_hashtbl = hashinit(desiredvnodes / 5, M_NEWBLK, &newblk_hash); bmsafemap_hashtbl = hashinit(1024, M_BMSAFEMAP, &bmsafemap_hash); - indir_hashtbl = hashinit(desiredvnodes / 10, M_FREEWORK, &indir_hash); + i = 1 << (ffs(desiredvnodes / 10) - 1); + indir_hashtbl = malloc(i * sizeof(indir_hashtbl[0]), M_FREEWORK, + M_WAITOK); + indir_hash = i - 1; + for (i = 0; i <= indir_hash; i++) + TAILQ_INIT(&indir_hashtbl[i]); /* initialise bioops hack */ bioops.io_start = softdep_disk_io_initiation; @@ -2077,6 +2225,7 @@ softdep_uninitialize() hashdestroy(inodedep_hashtbl, M_INODEDEP, inodedep_hash); hashdestroy(newblk_hashtbl, M_NEWBLK, newblk_hash); hashdestroy(bmsafemap_hashtbl, M_BMSAFEMAP, bmsafemap_hash); + free(indir_hashtbl, M_FREEWORK); } /* @@ -2108,6 +2257,7 @@ softdep_mount(devvp, mp, fs, cred) LIST_INIT(&ump->softdep_workitem_pending); LIST_INIT(&ump->softdep_journal_pending); TAILQ_INIT(&ump->softdep_unlinked); + LIST_INIT(&ump->softdep_dirtycg); ump->softdep_worklist_tail = NULL; ump->softdep_on_worklist = 0; ump->softdep_deps = 0; @@ -2570,6 +2720,7 @@ softdep_prealloc(vp, waitok) ffs_syncvnode(vp, waitok); ACQUIRE_LOCK(&lk); process_removes(vp); + process_truncates(vp); if (journal_space(ump, 0) == 0) { softdep_speedup(); if (journal_space(ump, 1) == 0) @@ -2604,9 +2755,12 @@ softdep_prelink(dvp, vp) ffs_syncvnode(dvp, MNT_WAIT); ACQUIRE_LOCK(&lk); /* Process vp before dvp as it may create .. removes. */ - if (vp) + if (vp) { process_removes(vp); + process_truncates(vp); + } process_removes(dvp); + process_truncates(dvp); softdep_speedup(); process_worklist_item(UFSTOVFS(ump), LK_NOWAIT); process_worklist_item(UFSTOVFS(ump), LK_NOWAIT); @@ -2753,6 +2907,7 @@ jtrunc_write(jtrunc, jseg, data) { struct jtrncrec *rec; + jtrunc->jt_jsegdep->jd_seg = jseg; rec = (struct jtrncrec *)data; rec->jt_op = JOP_TRUNC; rec->jt_ino = jtrunc->jt_ino; @@ -3013,7 +3168,8 @@ complete_jseg(jseg) rele_jseg(jseg); jmvref = WK_JMVREF(wk); LIST_REMOVE(jmvref, jm_deps); - free_pagedep(jmvref->jm_pagedep); + if ((jmvref->jm_pagedep->pd_state & ONWORKLIST) == 0) + free_pagedep(jmvref->jm_pagedep); WORKITEM_FREE(jmvref, D_JMVREF); break; case D_JNEWBLK: @@ -3026,8 +3182,7 @@ complete_jseg(jseg) handle_written_jfreefrag(WK_JFREEFRAG(wk)); break; case D_JTRUNC: - WK_JTRUNC(wk)->jt_jsegdep->jd_seg = jseg; - WORKITEM_FREE(wk, D_JTRUNC); + handle_written_jtrunc(WK_JTRUNC(wk)); break; default: panic("handle_written_jseg: Unknown type %s", @@ -3082,6 +3237,27 @@ handle_written_jseg(jseg, bp) free_jsegs(jblocks); } +static void +handle_written_jtrunc(jtrunc) + struct jtrunc *jtrunc; +{ + struct freeblks *freeblks; + + freeblks = jtrunc->jt_freeblks; + freeblks->fb_jtrunc = NULL; + jwork_insert(&freeblks->fb_jwork, jtrunc->jt_jsegdep); + /* + * If the freeblks is all journaled, we can add it to the worklist. + */ + if ((freeblks->fb_state & ALLCOMPLETE) == ALLCOMPLETE) { + /* Remove from the b_dep that is waiting on this write. */ + if (freeblks->fb_state & ONWORKLIST) + WORKLIST_REMOVE(&freeblks->fb_list); + add_to_worklist(&freeblks->fb_list, 1); + } + WORKITEM_FREE(jtrunc, D_JTRUNC); +} + static inline struct jsegdep * inoref_jseg(inoref) struct inoref *inoref; @@ -3123,7 +3299,7 @@ handle_written_jremref(jremref) jremref->jr_dirrem = NULL; LIST_REMOVE(jremref, jr_deps); jsegdep->jd_state |= jremref->jr_state & MKDIR_PARENT; - WORKLIST_INSERT(&dirrem->dm_jwork, &jsegdep->jd_list); + jwork_insert(&dirrem->dm_jwork, jsegdep); if (LIST_EMPTY(&dirrem->dm_jremrefhd) && (dirrem->dm_state & COMPLETE) != 0) add_to_worklist(&dirrem->dm_list, 0); @@ -3183,7 +3359,7 @@ handle_written_jaddref(jaddref) mkdir->md_state |= DEPCOMPLETE; complete_mkdir(mkdir); } - WORKLIST_INSERT(&diradd->da_jwork, &jsegdep->jd_list); + jwork_insert(&diradd->da_jwork, jsegdep); if (jaddref->ja_state & NEWBLOCK) { inodedep->id_state |= ONDEPLIST; LIST_INSERT_HEAD(&inodedep->id_bmsafemap->sm_inodedephd, @@ -3205,10 +3381,9 @@ handle_written_jnewblk(jnewblk) { struct bmsafemap *bmsafemap; struct freefrag *freefrag; + struct freework *freework; struct jsegdep *jsegdep; struct newblk *newblk; - struct freework *freework; - struct indirdep *indirdep; /* Grab the jsegdep. */ jsegdep = jnewblk->jn_jsegdep; @@ -3225,10 +3400,13 @@ handle_written_jnewblk(jnewblk) */ newblk = WK_NEWBLK(jnewblk->jn_dep); newblk->nb_jnewblk = NULL; - bmsafemap = newblk->nb_bmsafemap; - newblk->nb_state |= ONDEPLIST; - LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk, nb_deps); - WORKLIST_INSERT(&newblk->nb_jwork, &jsegdep->jd_list); + if ((newblk->nb_state & GOINGAWAY) == 0) { + bmsafemap = newblk->nb_bmsafemap; + newblk->nb_state |= ONDEPLIST; + LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk, + nb_deps); + } + jwork_insert(&newblk->nb_jwork, jsegdep); break; case D_FREEFRAG: /* @@ -3245,16 +3423,9 @@ handle_written_jnewblk(jnewblk) */ freework = WK_FREEWORK(jnewblk->jn_dep); freework->fw_jnewblk = NULL; - WORKLIST_INSERT(&freework->fw_jwork, &jsegdep->jd_list); + WORKLIST_INSERT(&freework->fw_freeblks->fb_jwork, + &jsegdep->jd_list); break; - case D_INDIRDEP: - /* - * An indirect block was removed by truncate. - */ - indirdep = WK_INDIRDEP(jnewblk->jn_dep); - LIST_REMOVE(jnewblk, jn_indirdeps); - WORKLIST_INSERT(&indirdep->ir_jwork, &jsegdep->jd_list); - break; default: panic("handle_written_jnewblk: Unknown type %d.", jnewblk->jn_dep->wk_type); @@ -3321,7 +3492,7 @@ handle_written_jfreefrag(jfreefrag) panic("handle_written_jfreefrag: No freefrag."); freefrag->ff_state |= DEPCOMPLETE; freefrag->ff_jdep = NULL; - WORKLIST_INSERT(&freefrag->ff_jwork, &jsegdep->jd_list); + jwork_insert(&freefrag->ff_jwork, jsegdep); if ((freefrag->ff_state & ALLCOMPLETE) == ALLCOMPLETE) add_to_worklist(&freefrag->ff_list, 0); jfreefrag->fr_freefrag = NULL; @@ -3351,12 +3522,8 @@ handle_written_jfreeblk(jfreeblk) * If the freeblks is all journaled, we can add it to the worklist. */ if (LIST_EMPTY(&freeblks->fb_jfreeblkhd) && - (freeblks->fb_state & ALLCOMPLETE) == ALLCOMPLETE) { - /* Remove from the b_dep that is waiting on this write. */ - if (freeblks->fb_state & ONWORKLIST) - WORKLIST_REMOVE(&freeblks->fb_list); + (freeblks->fb_state & ALLCOMPLETE) == ALLCOMPLETE) add_to_worklist(&freeblks->fb_list, 1); - } free_jfreeblk(jfreeblk); } @@ -3493,51 +3660,79 @@ free_freedep(freedep) * is visible outside of softdep_setup_freeblocks(). */ static struct freework * -newfreework(ump, freeblks, parent, lbn, nb, frags, journal) +newfreework(ump, freeblks, parent, lbn, nb, frags, off, journal) struct ufsmount *ump; struct freeblks *freeblks; struct freework *parent; ufs_lbn_t lbn; ufs2_daddr_t nb; int frags; + int off; int journal; { + struct jfreeblk *jfreeblk; struct freework *freework; freework = malloc(sizeof(*freework), M_FREEWORK, M_SOFTDEP_FLAGS); workitem_alloc(&freework->fw_list, D_FREEWORK, freeblks->fb_list.wk_mp); + freework->fw_state = ATTACHED; freework->fw_jnewblk = NULL; freework->fw_freeblks = freeblks; freework->fw_parent = parent; freework->fw_lbn = lbn; freework->fw_blkno = nb; freework->fw_frags = frags; + freework->fw_indir = NULL; freework->fw_ref = ((UFSTOVFS(ump)->mnt_kern_flag & MNTK_SUJ) == 0 || lbn >= -NXADDR) ? 0 : NINDIR(ump->um_fs) + 1; - freework->fw_off = 0; - LIST_INIT(&freework->fw_jwork); - + freework->fw_start = freework->fw_off = off; + jfreeblk = NULL; + if (journal) { + jfreeblk = newjfreeblk(freeblks, lbn, nb, frags, off); + LIST_INSERT_HEAD(&freeblks->fb_jfreeblkhd, jfreeblk, jf_deps); + } if (parent == NULL) { - WORKLIST_INSERT_UNLOCKED(&freeblks->fb_freeworkhd, - &freework->fw_list); + ACQUIRE_LOCK(&lk); + freework->fw_state |= ONDEPLIST; + WORKLIST_INSERT(&freeblks->fb_freeworkhd, &freework->fw_list); freeblks->fb_ref++; + FREE_LOCK(&lk); } - if (journal) - newjfreeblk(freeblks, lbn, nb, frags); return (freework); } /* + * Eliminate a jfreeblk for a block that does not need journaling. + */ +static void +cancel_jfreeblk(freeblks, blkno) + struct freeblks *freeblks; + ufs2_daddr_t blkno; +{ + struct jfreeblk *jfreeblk; + + LIST_FOREACH(jfreeblk, &freeblks->fb_jfreeblkhd, jf_deps) + if (jfreeblk->jf_blkno == blkno) + break; + if (jfreeblk == NULL) + return; + free_jsegdep(jfreeblk->jf_jsegdep); + LIST_REMOVE(jfreeblk, jf_deps); + WORKITEM_FREE(jfreeblk, D_JFREEBLK); +} + +/* * Allocate a new jfreeblk to journal top level block pointer when truncating * a file. The caller must add this to the worklist when lk is held. */ static struct jfreeblk * -newjfreeblk(freeblks, lbn, blkno, frags) +newjfreeblk(freeblks, lbn, blkno, frags, off) struct freeblks *freeblks; ufs_lbn_t lbn; ufs2_daddr_t blkno; int frags; + int off; { struct jfreeblk *jfreeblk; @@ -3545,12 +3740,12 @@ static struct jfreeblk * workitem_alloc(&jfreeblk->jf_list, D_JFREEBLK, freeblks->fb_list.wk_mp); jfreeblk->jf_jsegdep = newjsegdep(&jfreeblk->jf_list); jfreeblk->jf_state = ATTACHED | DEPCOMPLETE; - jfreeblk->jf_ino = freeblks->fb_previousinum; + jfreeblk->jf_ino = freeblks->fb_inum; jfreeblk->jf_lbn = lbn; jfreeblk->jf_blkno = blkno; jfreeblk->jf_frags = frags; + jfreeblk->jf_off = off; jfreeblk->jf_freeblks = freeblks; - LIST_INSERT_HEAD(&freeblks->fb_jfreeblkhd, jfreeblk, jf_deps); return (jfreeblk); } @@ -3645,7 +3840,7 @@ cancel_jaddref(jaddref, inodedep, wkhd) if (jaddref->ja_state & IOSTARTED) { jaddref->ja_state &= ~IOSTARTED; WORKLIST_REMOVE(&jaddref->ja_list); - WORKLIST_INSERT(wkhd, &jsegdep->jd_list); + jwork_insert(wkhd, jsegdep); } else { free_jsegdep(jsegdep); if (jaddref->ja_state & DEPCOMPLETE) @@ -3734,11 +3929,7 @@ free_jnewblk(jnewblk) } /* - * Cancel a jnewblk which has been superseded by a freeblk. The jnewblk - * is kept linked into the bmsafemap until the free completes, thus - * preventing the modified state from ever reaching disk. The free - * routine must pass this structure via ffs_blkfree() to - * softdep_setup_freeblks() so there is no race in releasing the space. + * Cancel a jnewblk which has been been made redundant by frag extension. */ static void cancel_jnewblk(jnewblk, wkhd) @@ -3756,7 +3947,7 @@ cancel_jnewblk(jnewblk, wkhd) if (jnewblk->jn_state & IOSTARTED) { jnewblk->jn_state &= ~IOSTARTED; WORKLIST_REMOVE(&jnewblk->jn_list); - WORKLIST_INSERT(wkhd, &jsegdep->jd_list); + jwork_insert(wkhd, jsegdep); } else { free_jsegdep(jsegdep); remove_from_journal(&jnewblk->jn_list); @@ -3792,11 +3983,8 @@ free_jseg(jseg, jblocks) * Free freework structures that were lingering to indicate freed * indirect blocks that forced journal write ordering on reallocate. */ - while ((freework = LIST_FIRST(&jseg->js_indirs)) != NULL) { - LIST_REMOVE(freework, fw_next); - WORKLIST_REMOVE(&freework->fw_list); - WORKITEM_FREE(freework, D_FREEWORK); - } + while ((freework = LIST_FIRST(&jseg->js_indirs)) != NULL) + indirblk_remove(freework); if (jblocks->jb_oldestseg == jseg) jblocks->jb_oldestseg = TAILQ_NEXT(jseg, js_next); TAILQ_REMOVE(&jblocks->jb_segs, jseg, js_next); @@ -3928,68 +4116,6 @@ inodedep_lookup_ip(ip) } /* - * Create a journal entry that describes a truncate that we're about to - * perform. The inode allocations and frees between here and the completion - * of the operation are done asynchronously and without journaling. At - * the end of the operation the vnode is sync'd and the journal space - * is released. Recovery will discover the partially completed truncate - * and complete it. - */ -void * -softdep_setup_trunc(vp, length, flags) - struct vnode *vp; - off_t length; - int flags; -{ - struct jsegdep *jsegdep; - struct jtrunc *jtrunc; - struct ufsmount *ump; - struct inode *ip; - - softdep_prealloc(vp, MNT_WAIT); - ip = VTOI(vp); - ump = VFSTOUFS(vp->v_mount); - jtrunc = malloc(sizeof(*jtrunc), M_JTRUNC, M_SOFTDEP_FLAGS); - workitem_alloc(&jtrunc->jt_list, D_JTRUNC, vp->v_mount); - jsegdep = jtrunc->jt_jsegdep = newjsegdep(&jtrunc->jt_list); - jtrunc->jt_ino = ip->i_number; - jtrunc->jt_extsize = 0; - jtrunc->jt_size = length; - if ((flags & IO_EXT) == 0 && ump->um_fstype == UFS2) - jtrunc->jt_extsize = ip->i_din2->di_extsize; - if ((flags & IO_NORMAL) == 0) - jtrunc->jt_size = DIP(ip, i_size); - ACQUIRE_LOCK(&lk); - add_to_journal(&jtrunc->jt_list); - while (jsegdep->jd_seg == NULL) { - stat_jwait_freeblks++; - jwait(&jtrunc->jt_list); - } - FREE_LOCK(&lk); - - return (jsegdep); -} - -/* - * After synchronous truncation is complete we free sync the vnode and - * release the jsegdep so the journal space can be freed. - */ -int -softdep_complete_trunc(vp, cookie) - struct vnode *vp; - void *cookie; -{ - int error; - - error = ffs_syncvnode(vp, MNT_WAIT); - ACQUIRE_LOCK(&lk); - free_jsegdep((struct jsegdep *)cookie); - FREE_LOCK(&lk); - - return (error); -} - -/* * Called prior to creating a new inode and linking it to a directory. The * jaddref structure must already be allocated by softdep_setup_inomapdep * and it is discovered here so we can initialize the mode and update @@ -4523,6 +4649,8 @@ bmsafemap_lookup(mp, bp, cg) LIST_INIT(&bmsafemap->sm_newblkwr); LIST_INIT(&bmsafemap->sm_jaddrefhd); LIST_INIT(&bmsafemap->sm_jnewblkhd); + LIST_INIT(&bmsafemap->sm_freehd); + LIST_INIT(&bmsafemap->sm_freewr); ACQUIRE_LOCK(&lk); if (bmsafemap_find(bmsafemaphd, mp, cg, &collision) == 1) { WORKITEM_FREE(bmsafemap, D_BMSAFEMAP); @@ -4530,6 +4658,7 @@ bmsafemap_lookup(mp, bp, cg) } bmsafemap->sm_cg = cg; LIST_INSERT_HEAD(bmsafemaphd, bmsafemap, sm_hash); + LIST_INSERT_HEAD(&VFSTOUFS(mp)->softdep_dirtycg, bmsafemap, sm_next); WORKLIST_INSERT(&bp->b_dep, &bmsafemap->sm_list); return (bmsafemap); } @@ -4814,9 +4943,8 @@ allocdirect_merge(adphead, newadp, oldadp) &oldadp->ad_block.nb_jnewblk->jn_list, &newadp->ad_block.nb_jwork); oldadp->ad_block.nb_jnewblk = NULL; - if (cancel_newblk(&oldadp->ad_block, NULL, - &newadp->ad_block.nb_jwork)) - panic("allocdirect_merge: Unexpected dependency."); + cancel_newblk(&oldadp->ad_block, NULL, + &newadp->ad_block.nb_jwork); } else { wk = (struct worklist *) cancel_newblk(&oldadp->ad_block, &freefrag->ff_list, &freefrag->ff_jwork); @@ -5084,6 +5212,7 @@ newallocindir(ip, ptrno, newblkno, oldblkno, lbn) aip = (struct allocindir *)newblk; aip->ai_offset = ptrno; aip->ai_oldblkno = oldblkno; + aip->ai_lbn = lbn; if ((jnewblk = newblk->nb_jnewblk) != NULL) { jnewblk->jn_ino = ip->i_number; jnewblk->jn_lbn = lbn; @@ -5110,6 +5239,7 @@ softdep_setup_allocindir_page(ip, lbn, bp, ptrno, struct buf *nbp; /* buffer holding allocated page */ { struct inodedep *inodedep; + struct freefrag *freefrag; struct allocindir *aip; struct pagedep *pagedep; struct mount *mp; @@ -5130,8 +5260,10 @@ softdep_setup_allocindir_page(ip, lbn, bp, ptrno, pagedep_lookup(mp, ip->i_number, lbn, DEPALLOC, &pagedep) == 0) WORKLIST_INSERT(&nbp->b_dep, &pagedep->pd_list); WORKLIST_INSERT(&nbp->b_dep, &aip->ai_block.nb_list); - setup_allocindir_phase2(bp, ip, inodedep, aip, lbn); + freefrag = setup_allocindir_phase2(bp, ip, inodedep, aip, lbn); FREE_LOCK(&lk); + if (freefrag) + handle_workitem_freefrag(freefrag); } /* @@ -5155,7 +5287,8 @@ softdep_setup_allocindir_meta(nbp, ip, bp, ptrno, aip = newallocindir(ip, ptrno, newblkno, 0, lbn); inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, DEPALLOC, &inodedep); WORKLIST_INSERT(&nbp->b_dep, &aip->ai_block.nb_list); - setup_allocindir_phase2(bp, ip, inodedep, aip, lbn); + if (setup_allocindir_phase2(bp, ip, inodedep, aip, lbn)) + panic("softdep_setup_allocindir_meta: Block already existed"); FREE_LOCK(&lk); } @@ -5166,7 +5299,7 @@ indirdep_complete(indirdep) struct allocindir *aip; LIST_REMOVE(indirdep, ir_next); - indirdep->ir_state &= ~ONDEPLIST; + indirdep->ir_state |= DEPCOMPLETE; while ((aip = LIST_FIRST(&indirdep->ir_completehd)) != NULL) { LIST_REMOVE(aip, ai_next); @@ -5181,127 +5314,134 @@ indirdep_complete(indirdep) free_indirdep(indirdep); } -/* - * Called to finish the allocation of the "aip" allocated - * by one of the two routines above. - */ -static void -setup_allocindir_phase2(bp, ip, inodedep, aip, lbn) - struct buf *bp; /* in-memory copy of the indirect block */ - struct inode *ip; /* inode for file being extended */ - struct inodedep *inodedep; /* Inodedep for ip */ - struct allocindir *aip; /* allocindir allocated by the above routines */ - ufs_lbn_t lbn; /* Logical block number for this block. */ +static struct indirdep * +indirdep_lookup(mp, ip, bp) + struct mount *mp; + struct inode *ip; + struct buf *bp; { + struct indirdep *indirdep, *newindirdep; + struct newblk *newblk; struct worklist *wk; struct fs *fs; - struct newblk *newblk; - struct indirdep *indirdep, *newindirdep; - struct allocindir *oldaip; - struct freefrag *freefrag; - struct mount *mp; ufs2_daddr_t blkno; - mp = UFSTOVFS(ip->i_ump); + mtx_assert(&lk, MA_OWNED); + indirdep = NULL; + newindirdep = NULL; fs = ip->i_fs; - mtx_assert(&lk, MA_OWNED); - if (bp->b_lblkno >= 0) - panic("setup_allocindir_phase2: not indir blk"); - for (freefrag = NULL, indirdep = NULL, newindirdep = NULL; ; ) { + for (;;) { LIST_FOREACH(wk, &bp->b_dep, wk_list) { if (wk->wk_type != D_INDIRDEP) continue; indirdep = WK_INDIRDEP(wk); break; } - if (indirdep == NULL && newindirdep) { - indirdep = newindirdep; - newindirdep = NULL; - WORKLIST_INSERT(&bp->b_dep, &indirdep->ir_list); - if (newblk_lookup(mp, dbtofsb(fs, bp->b_blkno), 0, - &newblk)) { - indirdep->ir_state |= ONDEPLIST; - LIST_INSERT_HEAD(&newblk->nb_indirdeps, - indirdep, ir_next); - } else - indirdep->ir_state |= DEPCOMPLETE; - } - if (indirdep) { - aip->ai_indirdep = indirdep; - /* - * Check to see if there is an existing dependency - * for this block. If there is, merge the old - * dependency into the new one. This happens - * as a result of reallocblk only. - */ - if (aip->ai_oldblkno == 0) - oldaip = NULL; - else - - LIST_FOREACH(oldaip, &indirdep->ir_deplisthd, - ai_next) - if (oldaip->ai_offset == aip->ai_offset) - break; - if (oldaip != NULL) - freefrag = allocindir_merge(aip, oldaip); - LIST_INSERT_HEAD(&indirdep->ir_deplisthd, aip, ai_next); - KASSERT(aip->ai_offset >= 0 && - aip->ai_offset < NINDIR(ip->i_ump->um_fs), - ("setup_allocindir_phase2: Bad offset %d", - aip->ai_offset)); - KASSERT(indirdep->ir_savebp != NULL, - ("setup_allocindir_phase2 NULL ir_savebp")); - if (ip->i_ump->um_fstype == UFS1) - ((ufs1_daddr_t *)indirdep->ir_savebp->b_data) - [aip->ai_offset] = aip->ai_oldblkno; - else - ((ufs2_daddr_t *)indirdep->ir_savebp->b_data) - [aip->ai_offset] = aip->ai_oldblkno; - FREE_LOCK(&lk); - if (freefrag != NULL) - handle_workitem_freefrag(freefrag); - } else - FREE_LOCK(&lk); - if (newindirdep) { - newindirdep->ir_savebp->b_flags |= B_INVAL | B_NOCACHE; - brelse(newindirdep->ir_savebp); - ACQUIRE_LOCK(&lk); - WORKITEM_FREE((caddr_t)newindirdep, D_INDIRDEP); - if (indirdep) - break; - FREE_LOCK(&lk); - } - if (indirdep) { - ACQUIRE_LOCK(&lk); + /* Found on the buffer worklist, no new structure to free. */ + if (indirdep != NULL && newindirdep == NULL) + return (indirdep); + if (indirdep != NULL && newindirdep != NULL) + panic("indirdep_lookup: simultaneous create"); + /* None found on the buffer and a new structure is ready. */ + if (indirdep == NULL && newindirdep != NULL) break; - } + /* None found and no new structure available. */ + FREE_LOCK(&lk); newindirdep = malloc(sizeof(struct indirdep), - M_INDIRDEP, M_SOFTDEP_FLAGS); + M_INDIRDEP, M_SOFTDEP_FLAGS); workitem_alloc(&newindirdep->ir_list, D_INDIRDEP, mp); newindirdep->ir_state = ATTACHED; if (ip->i_ump->um_fstype == UFS1) newindirdep->ir_state |= UFS1FMT; + TAILQ_INIT(&newindirdep->ir_trunc); newindirdep->ir_saveddata = NULL; LIST_INIT(&newindirdep->ir_deplisthd); LIST_INIT(&newindirdep->ir_donehd); LIST_INIT(&newindirdep->ir_writehd); LIST_INIT(&newindirdep->ir_completehd); - LIST_INIT(&newindirdep->ir_jwork); - LIST_INIT(&newindirdep->ir_jnewblkhd); if (bp->b_blkno == bp->b_lblkno) { ufs_bmaparray(bp->b_vp, bp->b_lblkno, &blkno, bp, NULL, NULL); bp->b_blkno = blkno; } + newindirdep->ir_freeblks = NULL; newindirdep->ir_savebp = getblk(ip->i_devvp, bp->b_blkno, bp->b_bcount, 0, 0, 0); + newindirdep->ir_bp = bp; BUF_KERNPROC(newindirdep->ir_savebp); bcopy(bp->b_data, newindirdep->ir_savebp->b_data, bp->b_bcount); ACQUIRE_LOCK(&lk); } + indirdep = newindirdep; + WORKLIST_INSERT(&bp->b_dep, &indirdep->ir_list); + /* + * If the block is not yet allocated we don't set DEPCOMPLETE so + * that we don't free dependencies until the pointers are valid. + * This could search b_dep for D_ALLOCDIRECT/D_ALLOCINDIR rather + * than using the hash. + */ + if (newblk_lookup(mp, dbtofsb(fs, bp->b_blkno), 0, &newblk)) + LIST_INSERT_HEAD(&newblk->nb_indirdeps, indirdep, ir_next); + else + indirdep->ir_state |= DEPCOMPLETE; + return (indirdep); } /* + * Called to finish the allocation of the "aip" allocated + * by one of the two routines above. + */ +static struct freefrag * +setup_allocindir_phase2(bp, ip, inodedep, aip, lbn) + struct buf *bp; /* in-memory copy of the indirect block */ + struct inode *ip; /* inode for file being extended */ + struct inodedep *inodedep; /* Inodedep for ip */ + struct allocindir *aip; /* allocindir allocated by the above routines */ + ufs_lbn_t lbn; /* Logical block number for this block. */ +{ + struct fs *fs; + struct indirdep *indirdep; + struct allocindir *oldaip; + struct freefrag *freefrag; + struct mount *mp; + + mtx_assert(&lk, MA_OWNED); + mp = UFSTOVFS(ip->i_ump); + fs = ip->i_fs; + if (bp->b_lblkno >= 0) + panic("setup_allocindir_phase2: not indir blk"); + KASSERT(aip->ai_offset >= 0 && aip->ai_offset < NINDIR(fs), + ("setup_allocindir_phase2: Bad offset %d", aip->ai_offset)); + indirdep = indirdep_lookup(mp, ip, bp); + KASSERT(indirdep->ir_savebp != NULL, + ("setup_allocindir_phase2 NULL ir_savebp")); + aip->ai_indirdep = indirdep; + /* + * Check for an unwritten dependency for this indirect offset. If + * there is, merge the old dependency into the new one. This happens + * as a result of reallocblk only. + */ + freefrag = NULL; + if (aip->ai_oldblkno != 0) { + LIST_FOREACH(oldaip, &indirdep->ir_deplisthd, ai_next) { + if (oldaip->ai_offset == aip->ai_offset) { + freefrag = allocindir_merge(aip, oldaip); + goto done; + } + } + LIST_FOREACH(oldaip, &indirdep->ir_donehd, ai_next) { + if (oldaip->ai_offset == aip->ai_offset) { + freefrag = allocindir_merge(aip, oldaip); + goto done; + } + } + } +done: + LIST_INSERT_HEAD(&indirdep->ir_deplisthd, aip, ai_next); + return (freefrag); +} + +/* * Merge two allocindirs which refer to the same block. Move newblock * dependencies and setup the freefrags appropriately. */ @@ -5363,7 +5503,7 @@ setup_freedirect(freeblks, ip, i, needj) DIP_SET(ip, i_db[i], 0); frags = sblksize(ip->i_fs, ip->i_size, i); frags = numfrags(ip->i_fs, frags); - newfreework(ip->i_ump, freeblks, NULL, i, blkno, frags, needj); + newfreework(ip->i_ump, freeblks, NULL, i, blkno, frags, 0, needj); } static inline void @@ -5382,15 +5522,15 @@ setup_freeext(freeblks, ip, i, needj) ip->i_din2->di_extb[i] = 0; frags = sblksize(ip->i_fs, ip->i_din2->di_extsize, i); frags = numfrags(ip->i_fs, frags); - newfreework(ip->i_ump, freeblks, NULL, -1 - i, blkno, frags, needj); + newfreework(ip->i_ump, freeblks, NULL, -1 - i, blkno, frags, 0, needj); } static inline void setup_freeindir(freeblks, ip, i, lbn, needj) struct freeblks *freeblks; struct inode *ip; + int i; ufs_lbn_t lbn; - int i; int needj; { ufs2_daddr_t blkno; @@ -5400,7 +5540,7 @@ setup_freeindir(freeblks, ip, i, lbn, needj) return; DIP_SET(ip, i_ib[i], 0); newfreework(ip->i_ump, freeblks, NULL, lbn, blkno, ip->i_fs->fs_frag, - needj); + 0, needj); } static inline struct freeblks * @@ -5415,16 +5555,529 @@ newfreeblks(mp, ip) workitem_alloc(&freeblks->fb_list, D_FREEBLKS, mp); LIST_INIT(&freeblks->fb_jfreeblkhd); LIST_INIT(&freeblks->fb_jwork); + freeblks->fb_ref = 1; + freeblks->fb_jtrunc = NULL; freeblks->fb_state = ATTACHED; freeblks->fb_uid = ip->i_uid; - freeblks->fb_previousinum = ip->i_number; + freeblks->fb_inum = ip->i_number; + freeblks->fb_modrev = DIP(ip, i_modrev); freeblks->fb_devvp = ip->i_devvp; freeblks->fb_chkcnt = 0; + freeblks->fb_freecnt = 0; + freeblks->fb_len = 0; return (freeblks); } +static void +trunc_indirdep(indirdep, freeblks, bp, off) + struct indirdep *indirdep; + struct freeblks *freeblks; + struct buf *bp; + int off; +{ + struct allocindir *aip, *aipn; + + /* + * The first set of allocindirs won't be in savedbp. + */ + LIST_FOREACH_SAFE(aip, &indirdep->ir_deplisthd, ai_next, aipn) + if (aip->ai_offset > off) + cancel_allocindir(aip, bp, freeblks, 1); + LIST_FOREACH_SAFE(aip, &indirdep->ir_donehd, ai_next, aipn) + if (aip->ai_offset > off) + cancel_allocindir(aip, bp, freeblks, 1); + /* + * These will exist in savedbp. + */ + LIST_FOREACH_SAFE(aip, &indirdep->ir_writehd, ai_next, aipn) + if (aip->ai_offset > off) + cancel_allocindir(aip, NULL, freeblks, 0); + LIST_FOREACH_SAFE(aip, &indirdep->ir_completehd, ai_next, aipn) + if (aip->ai_offset > off) + cancel_allocindir(aip, NULL, freeblks, 0); +} + /* + * Follow the chain of indirects down to lastlbn creating a freework + * structure for each. This will be used to start indir_trunc() at + * the right offset and create the journal records for the parrtial + * truncation. A second step will handle the truncated dependencies. + */ +static int +setup_trunc_indir(freeblks, ip, lbn, lastlbn, blkno, needj) + struct freeblks *freeblks; + struct inode *ip; + ufs_lbn_t lbn; + ufs_lbn_t lastlbn; + ufs2_daddr_t blkno; + int needj; +{ + struct indirdep *indirdep; + struct indirdep *indirn; + struct freework *freework; + struct newblk *newblk; + struct mount *mp; + struct buf *bp; + uint8_t *start; + uint8_t *end; + ufs_lbn_t lbnadd; + int level; + int error; + int off; + + + freework = NULL; + if (blkno == 0) + return (0); + mp = freeblks->fb_list.wk_mp; + bp = getblk(ITOV(ip), lbn, mp->mnt_stat.f_iosize, 0, 0, 0); + if ((bp->b_flags & B_CACHE) == 0) { + bp->b_blkno = blkptrtodb(VFSTOUFS(mp), blkno); + bp->b_iocmd = BIO_READ; + bp->b_flags &= ~B_INVAL; + bp->b_ioflags &= ~BIO_ERROR; + vfs_busy_pages(bp, 0); + bp->b_iooffset = dbtob(bp->b_blkno); + bstrategy(bp); + curthread->td_ru.ru_inblock++; + error = bufwait(bp); + if (error) { + brelse(bp); + return (error); + } + } + level = lbn_level(lbn); + lbnadd = lbn_offset(ip->i_fs, level); + /* + * Compute the offset of the last block we want to keep. Store + * in the freework the first block we want to completely free. + */ + off = (lastlbn - -(lbn + level)) / lbnadd; + if (off + 1 == NINDIR(ip->i_fs)) + goto nowork; + freework = newfreework(ip->i_ump, freeblks, NULL, lbn, blkno, 0, off+1, + needj); + /* + * Link the freework into the indirdep. This will prevent any new + * allocations from proceeding until we are finished with the + * truncate and the block is written. + */ + ACQUIRE_LOCK(&lk); + indirdep = indirdep_lookup(mp, ip, bp); + TAILQ_INSERT_TAIL(&indirdep->ir_trunc, freework, fw_next); + freework->fw_indir = indirdep; + /* + * Cancel any allocindirs that will not make it to disk. + * We have to do this for all copies of the indirdep that + * live on this newblk. + */ + if ((indirdep->ir_state & DEPCOMPLETE) == 0) { + newblk_lookup(mp, dbtofsb(ip->i_fs, bp->b_blkno), 0, &newblk); + LIST_FOREACH(indirn, &newblk->nb_indirdeps, ir_next) + trunc_indirdep(indirn, freeblks, bp, off); + } else + trunc_indirdep(indirdep, freeblks, bp, off); + FREE_LOCK(&lk); + /* + * Creation is protected by the buf lock. The saveddata is only + * needed if a full truncation follows a partial truncation but it + * is difficult to allocate in that case so we fetch it anyway. + */ + if (indirdep->ir_saveddata == NULL) + indirdep->ir_saveddata = malloc(bp->b_bcount, M_INDIRDEP, + M_SOFTDEP_FLAGS); +nowork: + /* Fetch the blkno of the child and the zero start offset. */ + if (ip->i_ump->um_fstype == UFS1) { + blkno = ((ufs1_daddr_t *)bp->b_data)[off]; + start = (uint8_t *)&((ufs1_daddr_t *)bp->b_data)[off+1]; + } else { + blkno = ((ufs2_daddr_t *)bp->b_data)[off]; + start = (uint8_t *)&((ufs2_daddr_t *)bp->b_data)[off+1]; + } + if (freework) { + /* Zero the truncated pointers. */ + end = bp->b_data + bp->b_bcount; + bzero(start, end - start); + bdwrite(bp); + } else + bqrelse(bp); + if (level == 0) + return (0); + lbn++; /* adjust level */ + lbn -= (off * lbnadd); + return setup_trunc_indir(freeblks, ip, lbn, lastlbn, blkno, needj); +} + +/* + * Complete the partial truncation of an indirect block setup by + * setup_trunc_indir(). This zeros the truncated pointers in the saved + * copy and writes them to disk before the freeblks is allowed to complete. + */ +static void +complete_trunc_indir(freework) + struct freework *freework; +{ + struct indirdep *indirdep; + struct buf *bp; + uintptr_t start; + int count; + + indirdep = freework->fw_indir; + freework->fw_state |= DEPCOMPLETE; + do { + bp = indirdep->ir_bp; + /* See if the block was discarded. */ + if (bp == NULL) + break; + } while ((bp = getdirtybuf(bp, &lk, MNT_WAIT)) == NULL); + TAILQ_REMOVE(&indirdep->ir_trunc, freework, fw_next); + /* + * Zero the pointers in the saved copy. + */ + if (indirdep->ir_state & UFS1FMT) + start = sizeof(ufs1_daddr_t); + else + start = sizeof(ufs2_daddr_t); + start *= freework->fw_start; + count = indirdep->ir_savebp->b_bcount - start; + start += (uintptr_t)indirdep->ir_savebp->b_data; + bzero((char *)start, count); + /* + * If bp is NULL the block was fully truncated, restore the saved + * block list otherwise free it if it is no longer needed. + */ + if (TAILQ_EMPTY(&indirdep->ir_trunc)) { + if (bp == NULL) + bcopy(indirdep->ir_saveddata, + indirdep->ir_savebp->b_data, + indirdep->ir_savebp->b_bcount); + free(indirdep->ir_saveddata, M_INDIRDEP); + indirdep->ir_saveddata = NULL; + } + /* + * When bp is NULL there is a full truncation pending. We + * must wait for this full truncation to be journaled before + * we can release this freework because the disk pointers will + * never be written as zero. + */ + if (bp == NULL) { + if (LIST_EMPTY(&indirdep->ir_freeblks->fb_jfreeblkhd)) { + handle_written_freework(freework); + return; + } + freework->fw_state |= ONDEPLIST; + WORKLIST_INSERT(&indirdep->ir_freeblks->fb_freeworkhd, + &freework->fw_list); + return; + } + /* Complete when the real copy is written. */ + WORKLIST_INSERT(&bp->b_dep, &freework->fw_list); + FREE_LOCK(&lk); + bqrelse(bp); + ACQUIRE_LOCK(&lk); + return; +} + +/* + * Handle freeblocks for journaled softupdate filesystems. + * + * Contrary to normal softupdates, we must preserve the block pointers in + * indirects until their subordinates are free. This is to avoid journaling + * every block that is freed which may consume more space than the journal + * itself. The recovery program will see the free block journals at the + * base of the truncated area and traverse them to reclaim space. The + * pointers in the inode may be cleared immediately after the journal + * records are written because each direct and indirect pointer in the + * inode is recorded in a journal. This permits full truncation to proceed + * asynchronously. The write order is journal -> inode -> cgs -> indirects. + * + * The algorithm is as follows: + * 1) Traverse the in-memory state and create journal entries to release + * the relevant blocks and full indirect trees. + * 2) Traverse the indirect block chain adding partial truncation freework + * records to indirects in the path to lastlbn. The freework will + * prevent new allocation dependencies from being satisfied in this + * indirect until the truncation completes. + * 3) Read and lock the inode block, performing an update with the new size + * and pointers. This prevents truncated data from becoming valid on + * disk through step 4. + * 4) Reap unsatisfied dependencies that are beyond the truncated area, + * eliminate journal work for those records that do not require it. + * 5) Schedule the journal records to be written followed by the inode block. + * 6) Allocate any necessary frags for the end of file. + * 7) Zero any partially truncated blocks. + * + * From this truncation proceeds asynchronously using the freework and + * indir_trunc machinery. The file will not be extended again into a + * partially truncated indirect block until all work is completed but + * the normal dependency mechanism ensures that it is rolled back/forward + * as appropriate. Further truncation may occur without delay and is + * serialized in indir_trunc(). + */ +void +softdep_journal_freeblocks(ip, cred, length, flags) + struct inode *ip; /* The inode whose length is to be reduced */ + struct ucred *cred; + off_t length; /* The new length for the file */ + int flags; /* IO_EXT and/or IO_NORMAL */ +{ + struct freeblks *freeblks, *fbn; + struct inodedep *inodedep; + struct jfreeblk *jfreeblk; + struct allocdirect *adp, *adpn; + struct fs *fs; + struct buf *bp; + struct vnode *vp; + ufs2_daddr_t extblocks, datablocks; + struct mount *mp; + ufs_lbn_t tmpval, lbn, lastlbn, partlbn; + off_t osize; + int frags; + int lastoff, iboff; + int allocblock; + int error, i; + int partlvl; + int needj; + + fs = ip->i_fs; + mp = UFSTOVFS(ip->i_ump); + vp = ITOV(ip); + needj = 1; + lastoff = 0; + iboff = -1; + allocblock = 0; + extblocks = 0; + datablocks = 0; + partlbn = 0; + partlvl = 0; + osize = 0; + frags = 0; + freeblks = newfreeblks(mp, ip); + ACQUIRE_LOCK(&lk); + /* + * If we're truncating a removed file that will never be written + * we don't need to journal the block frees. The canceled journals + * for the allocations will suffice. + */ + inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep); + if ((inodedep->id_state & (UNLINKED | DEPCOMPLETE)) == UNLINKED && + length == 0) + needj = 0; + FREE_LOCK(&lk); + /* + * Calculate the lbn that we are truncating to. This results in -1 + * if we're truncating the 0 bytes. So it is the last lbn we want + * to keep, not the first lbn we want to truncate. + */ + lastlbn = lblkno(fs, length + fs->fs_bsize - 1) - 1; + /* + * Compute frags we are keeping in lastlbn. 0 means all. + */ + if (lastlbn >= 0 && lastlbn < NDADDR) { + lastoff = blkoff(fs, length); + frags = fragroundup(fs, lastoff); + /* adp offset of last valid allocdirect. */ + iboff = lastlbn; + } + if (fs->fs_magic == FS_UFS2_MAGIC) + extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize)); + /* + * Handle normal data blocks and indirects. This section saves + * values used after the inode update to complete frag and indirect + * truncation. + */ + if ((flags & IO_NORMAL) != 0) { + /* + * Handle truncation of whole direct and indirect blocks. + */ + for (i = 0; i < NDADDR; i++) + if (i > lastlbn) + setup_freedirect(freeblks, ip, i, needj); + for (i = 0, tmpval = NINDIR(fs), lbn = NDADDR; i < NIADDR; + i++, lbn += tmpval, tmpval *= NINDIR(fs)) { + /* Release a whole indirect tree. */ + if (lbn > lastlbn) { + setup_freeindir(freeblks, ip, i, -lbn -i, + needj); + continue; + } + iboff = i + NDADDR; + /* + * Record the level and lbn for a indir needing + * partial truncation. + */ + if (lbn <= lastlbn && lbn + tmpval - 1 > lastlbn) { + partlbn = -lbn - i; + partlvl = i; + } + } + /* + * Traverse partially truncated indirect tree. + */ + if (partlbn) + setup_trunc_indir(freeblks, ip, partlbn, + lastlbn, DIP(ip, i_ib[partlvl]), needj); + osize = ip->i_size; + ip->i_size = length; + /* + * Handle partial truncation to a frag boundary. + */ + if (lastoff) { + ufs2_daddr_t blkno; + long oldfrags; + + oldfrags = blksize(fs, ip, lastlbn); + blkno = DIP(ip, i_db[lastlbn]); + if (blkno && oldfrags != frags) { + oldfrags -= frags; + oldfrags = numfrags(ip->i_fs, oldfrags); + blkno += numfrags(ip->i_fs, frags); + newfreework(ip->i_ump, freeblks, NULL, lastlbn, + blkno, oldfrags, 0, needj); + } else if (blkno == 0) { + ip->i_size = length - lastoff; + allocblock = 1; + } + } + DIP_SET(ip, i_size, ip->i_size); + if (length == 0) + datablocks = DIP(ip, i_blocks) - extblocks; + freeblks->fb_len = length; + } + if ((flags & IO_EXT) != 0) { + for (i = 0; i < NXADDR; i++) + setup_freeext(freeblks, ip, i, needj); + ip->i_din2->di_extsize = 0; + datablocks += extblocks; + } + freeblks->fb_chkcnt = datablocks; + UFS_LOCK(ip->i_ump); + fs->fs_pendingblocks += datablocks; + UFS_UNLOCK(ip->i_ump); + DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - datablocks); + /* + * Handle truncation of incomplete alloc direct dependencies. We + * hold the inode block locked to prevent incomplete dependencies + * from reaching the disk while we are eliminating those that + * have been truncated. This is a partially inlined ffs_update(). + */ + ufs_itimes(vp); + ip->i_flag &= ~(IN_LAZYACCESS | IN_LAZYMOD | IN_MODIFIED); + error = bread(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), + (int)fs->fs_bsize, cred, &bp); + if (error) { + brelse(bp); + softdep_error("softdep_journal_freeblocks", error); + return; + } + if (bp->b_bufsize == fs->fs_bsize) + bp->b_flags |= B_CLUSTEROK; + softdep_update_inodeblock(ip, bp, 0); + if (ip->i_ump->um_fstype == UFS1) + *((struct ufs1_dinode *)bp->b_data + + ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1; + else + *((struct ufs2_dinode *)bp->b_data + + ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2; + ACQUIRE_LOCK(&lk); + (void) inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep); + if ((inodedep->id_state & IOSTARTED) != 0) + panic("softdep_setup_freeblocks: inode busy"); + /* + * Add the freeblks structure to the list of operations that + * must await the zero'ed inode being written to disk. If we + * still have a bitmap dependency (needj), then the inode + * has never been written to disk, so we can process the + * freeblks below once we have deleted the dependencies. + */ + if (needj) + WORKLIST_INSERT(&bp->b_dep, &freeblks->fb_list); + else + freeblks->fb_state |= COMPLETE; + if (flags & IO_NORMAL) { + TAILQ_FOREACH_SAFE(adp, &inodedep->id_inoupdt, ad_next, adpn) { + if (adp->ad_offset > iboff) + cancel_allocdirect(&inodedep->id_inoupdt, adp, + freeblks); + /* + * Truncate the allocdirect. We could eliminate + * or modify journal records as well. + */ + else if (adp->ad_offset == iboff && lastoff) + adp->ad_newsize = frags; + } + } + if (flags & IO_EXT) + while ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != 0) + cancel_allocdirect(&inodedep->id_extupdt, adp, + freeblks); + /* + * Add journal work. + */ + LIST_FOREACH(jfreeblk, &freeblks->fb_jfreeblkhd, jf_deps) + add_to_journal(&jfreeblk->jf_list); + FREE_LOCK(&lk); + bdwrite(bp); + /* + * Truncate dependency structures beyond length. + */ + trunc_dependencies(ip, freeblks, lastlbn, frags, flags); + /* + * This is only set when we need to allocate a fragment because + * none existed at the end of a frag-sized file. It handles only + * allocating a new, zero filled block. + */ + if (allocblock) { + error = UFS_BALLOC(vp, length - 1, 1, cred, BA_CLRBUF, &bp); + if (error != 0) { + softdep_error("softdep_journal_freeblks", error); + return; + } + ip->i_size = length; + DIP_SET(ip, i_size, length); + ip->i_flag |= IN_CHANGE | IN_UPDATE; + allocbuf(bp, frags); + bawrite(bp); + /* + * Zero the end of a truncated frag or block. + */ + } else if (lastoff != 0 && vp->v_type != VDIR) { + int size; + + size = sblksize(fs, osize, lastlbn); + error = bread(vp, lastlbn, size, cred, &bp); + if (error) { + softdep_error("softdep_journal_freeblks", error); + return; + } + bzero((char *)bp->b_data + lastoff, size - lastoff); + size = sblksize(fs, length, lastlbn); + allocbuf(bp, size); + bawrite(bp); + + } + ACQUIRE_LOCK(&lk); + inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep); + TAILQ_INSERT_TAIL(&inodedep->id_freeblklst, freeblks, fb_next); + freeblks->fb_state |= DEPCOMPLETE | ONDEPLIST; + /* + * We zero earlier truncations so they don't erroneously + * update i_blocks. + */ + if (freeblks->fb_len == 0) + TAILQ_FOREACH(fbn, &inodedep->id_freeblklst, fb_next) + fbn->fb_len = 0; + if ((freeblks->fb_state & ALLCOMPLETE) != ALLCOMPLETE || + !LIST_EMPTY(&freeblks->fb_jfreeblkhd)) + freeblks = NULL; + FREE_LOCK(&lk); + if (freeblks) + handle_workitem_freeblocks(freeblks, 0); +} + +/* * Block de-allocation dependencies. * * When blocks are de-allocated, the on-disk pointers must be nullified before @@ -5464,7 +6117,6 @@ softdep_setup_freeblocks(ip, length, flags) struct freeblks *freeblks; struct inodedep *inodedep; struct allocdirect *adp; - struct jfreeblk *jfreeblk; struct buf *bp; struct fs *fs; ufs2_daddr_t extblocks, datablocks; @@ -5472,52 +6124,37 @@ softdep_setup_freeblocks(ip, length, flags) int i, delay, error; ufs_lbn_t tmpval; ufs_lbn_t lbn; - int needj; fs = ip->i_fs; mp = UFSTOVFS(ip->i_ump); if (length != 0) panic("softdep_setup_freeblocks: non-zero length"); freeblks = newfreeblks(mp, ip); - ACQUIRE_LOCK(&lk); - /* - * If we're truncating a removed file that will never be written - * we don't need to journal the block frees. The canceled journals - * for the allocations will suffice. - */ - inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep); - if ((inodedep->id_state & (UNLINKED | DEPCOMPLETE)) == UNLINKED || - (fs->fs_flags & FS_SUJ) == 0) - needj = 0; - else - needj = 1; - FREE_LOCK(&lk); extblocks = 0; + datablocks = 0; if (fs->fs_magic == FS_UFS2_MAGIC) extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize)); - datablocks = DIP(ip, i_blocks) - extblocks; if ((flags & IO_NORMAL) != 0) { for (i = 0; i < NDADDR; i++) - setup_freedirect(freeblks, ip, i, needj); + setup_freedirect(freeblks, ip, i, 0); for (i = 0, tmpval = NINDIR(fs), lbn = NDADDR; i < NIADDR; i++, lbn += tmpval, tmpval *= NINDIR(fs)) - setup_freeindir(freeblks, ip, i, -lbn -i, needj); + setup_freeindir(freeblks, ip, i, -lbn -i, 0); ip->i_size = 0; DIP_SET(ip, i_size, 0); - freeblks->fb_chkcnt = datablocks; - UFS_LOCK(ip->i_ump); - fs->fs_pendingblocks += datablocks; - UFS_UNLOCK(ip->i_ump); + datablocks = DIP(ip, i_blocks) - extblocks; } if ((flags & IO_EXT) != 0) { for (i = 0; i < NXADDR; i++) - setup_freeext(freeblks, ip, i, needj); + setup_freeext(freeblks, ip, i, 0); ip->i_din2->di_extsize = 0; - freeblks->fb_chkcnt += extblocks; + datablocks += extblocks; } - if (LIST_EMPTY(&freeblks->fb_jfreeblkhd)) - needj = 0; - DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - freeblks->fb_chkcnt); + freeblks->fb_chkcnt = datablocks; + UFS_LOCK(ip->i_ump); + fs->fs_pendingblocks += datablocks; + UFS_UNLOCK(ip->i_ump); + DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - datablocks); /* * Push the zero'ed inode to to its disk buffer so that we are free * to delete its dependencies below. Once the dependencies are gone @@ -5557,8 +6194,6 @@ softdep_setup_freeblocks(ip, length, flags) delay = (inodedep->id_state & DEPCOMPLETE); if (delay) WORKLIST_INSERT(&bp->b_dep, &freeblks->fb_list); - else if (needj) - freeblks->fb_state |= COMPLETE; /* * Because the file length has been truncated to zero, any * pending block allocation dependency structures associated @@ -5573,27 +6208,22 @@ softdep_setup_freeblocks(ip, length, flags) &inodedep->id_inoupdt); while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0) cancel_allocdirect(&inodedep->id_inoupdt, adp, - freeblks, delay); + freeblks); } if (flags & IO_EXT) { merge_inode_lists(&inodedep->id_newextupdt, &inodedep->id_extupdt); while ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != 0) cancel_allocdirect(&inodedep->id_extupdt, adp, - freeblks, delay); + freeblks); } - LIST_FOREACH(jfreeblk, &freeblks->fb_jfreeblkhd, jf_deps) - add_to_journal(&jfreeblk->jf_list); - FREE_LOCK(&lk); bdwrite(bp); - softdep_trunc_deps(ITOV(ip), freeblks, 0, 0, flags); + trunc_dependencies(ip, freeblks, -1, 0, flags); ACQUIRE_LOCK(&lk); if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) != 0) (void) free_inodedep(inodedep); - - if (delay || needj) - freeblks->fb_state |= DEPCOMPLETE; + freeblks->fb_state |= DEPCOMPLETE; if (delay) { /* * If the inode with zeroed block pointers is now on disk @@ -5604,17 +6234,15 @@ softdep_setup_freeblocks(ip, length, flags) */ if ((freeblks->fb_state & ALLCOMPLETE) == ALLCOMPLETE) add_to_worklist(&freeblks->fb_list, 1); - } - if (needj && LIST_EMPTY(&freeblks->fb_jfreeblkhd)) - needj = 0; - + } else + freeblks->fb_state |= COMPLETE; FREE_LOCK(&lk); /* - * If the inode has never been written to disk (delay == 0) and - * we're not waiting on any journal writes, then we can process the - * freeblks now that we have deleted the dependencies. + * If the inode has never been written to disk (delay == 0) then + * we can process the freeblks now that we have deleted the + * dependencies. */ - if (!delay && !needj) + if (!delay) handle_workitem_freeblocks(freeblks, 0); } @@ -5622,18 +6250,19 @@ softdep_setup_freeblocks(ip, length, flags) * Eliminate any dependencies that exist in memory beyond lblkno:off */ static void -softdep_trunc_deps(vp, freeblks, lblkno, off, flags) - struct vnode *vp; +trunc_dependencies(ip, freeblks, lastlbn, lastoff, flags) + struct inode *ip; struct freeblks *freeblks; - ufs_lbn_t lblkno; - int off; + ufs_lbn_t lastlbn; + int lastoff; int flags; { - struct inodedep *inodedep; struct bufobj *bo; + struct vnode *vp; struct buf *bp; - struct mount *mp; - ino_t ino; + struct fs *fs; + ufs_lbn_t lbn; + int blkoff; /* * We must wait for any I/O in progress to finish so that @@ -5641,26 +6270,48 @@ static void * Once they are all there, walk the list and get rid of * any dependencies. */ - ino = VTOI(vp)->i_number; - mp = vp->v_mount; + fs = ip->i_fs; + vp = ITOV(ip); bo = &vp->v_bufobj; BO_LOCK(bo); drain_output(vp); + TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) + bp->b_vflags &= ~BV_SCANNED; restart: TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) { if (((flags & IO_EXT) == 0 && (bp->b_xflags & BX_ALTDATA)) || ((flags & IO_NORMAL) == 0 && (bp->b_xflags & BX_ALTDATA) == 0)) continue; + if (bp->b_vflags & BV_SCANNED) + continue; + blkoff = 0; + /* + * If this is a partial truncate we only want those + * blocks and indirect blocks that cover the range + * we're after. We handle the partially de-allocated + * indirects in a different pass. + */ + if (lastlbn > -1 && (bp->b_xflags & BX_ALTDATA) == 0) { + lbn = bp->b_lblkno; + if (lbn < 0) + lbn = -(lbn + lbn_level(lbn)); + if (lbn < lastlbn) + continue; + /* Here we only truncate lblkno if it's partial. */ + if (lbn == lastlbn) { + if (lastoff == 0) + continue; + blkoff = lastoff; + } + } if ((bp = getdirtybuf(bp, BO_MTX(bo), MNT_WAIT)) == NULL) goto restart; BO_UNLOCK(bo); - ACQUIRE_LOCK(&lk); - (void) inodedep_lookup(mp, ino, 0, &inodedep); - if (deallocate_dependencies(bp, inodedep, freeblks, 0)) - bp->b_flags |= B_INVAL | B_NOCACHE; - FREE_LOCK(&lk); - brelse(bp); + if (deallocate_dependencies(bp, freeblks, blkoff)) + bqrelse(bp); + else + brelse(bp); BO_LOCK(bo); goto restart; } @@ -5668,69 +6319,66 @@ restart: } static int -cancel_pagedep(pagedep, inodedep, freeblks) +cancel_pagedep(pagedep, freeblks, blkoff) struct pagedep *pagedep; - struct inodedep *inodedep; struct freeblks *freeblks; + int blkoff; { - struct newdirblk *newdirblk; struct jremref *jremref; struct jmvref *jmvref; - struct dirrem *dirrem; + struct dirrem *dirrem, *tmp; int i; /* - * There should be no directory add dependencies present - * as the directory could not be truncated until all - * children were removed. - */ - KASSERT(LIST_FIRST(&pagedep->pd_pendinghd) == NULL, - ("deallocate_dependencies: pendinghd != NULL")); - for (i = 0; i < DAHASHSZ; i++) - KASSERT(LIST_FIRST(&pagedep->pd_diraddhd[i]) == NULL, - ("deallocate_dependencies: diraddhd != NULL")); - /* * Copy any directory remove dependencies to the list - * to be processed after the zero'ed inode is written. - * If the inode has already been written, then they + * to be processed after the freeblks proceeds. If + * directory entry never made it to disk they * can be dumped directly onto the work list. */ - LIST_FOREACH(dirrem, &pagedep->pd_dirremhd, dm_next) { + LIST_FOREACH_SAFE(dirrem, &pagedep->pd_dirremhd, dm_next, tmp) { + /* Skip this directory removal if it is intended to remain. */ + if (dirrem->dm_offset < blkoff) + continue; /* - * If there are any dirrems we wait for - * the journal write to complete and - * then restart the buf scan as the lock + * If there are any dirrems we wait for the journal write + * to complete and then restart the buf scan as the lock * has been dropped. */ - while ((jremref = LIST_FIRST(&dirrem->dm_jremrefhd)) - != NULL) { + while ((jremref = LIST_FIRST(&dirrem->dm_jremrefhd)) != NULL) { stat_jwait_filepage++; jwait(&jremref->jr_list); return (ERESTART); } LIST_REMOVE(dirrem, dm_next); dirrem->dm_dirinum = pagedep->pd_ino; - if (inodedep == NULL || - (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) { - dirrem->dm_state |= COMPLETE; - add_to_worklist(&dirrem->dm_list, 0); - } else - WORKLIST_INSERT(&inodedep->id_bufwait, - &dirrem->dm_list); + WORKLIST_INSERT(&freeblks->fb_freeworkhd, &dirrem->dm_list); } - if ((pagedep->pd_state & NEWBLOCK) != 0) { - newdirblk = pagedep->pd_newdirblk; - WORKLIST_REMOVE(&newdirblk->db_list); - free_newdirblk(newdirblk); - } while ((jmvref = LIST_FIRST(&pagedep->pd_jmvrefhd)) != NULL) { stat_jwait_filepage++; jwait(&jmvref->jm_list); return (ERESTART); } - WORKLIST_REMOVE(&pagedep->pd_list); - LIST_REMOVE(pagedep, pd_hash); - WORKITEM_FREE(pagedep, D_PAGEDEP); + /* + * When we're partially truncating a pagedep we just want to flush + * journal entries and return. There can not be any adds in the + * truncated portion of the directory and newblk must remain if + * part of the block remains. + */ + if (blkoff != 0) + return (0); + /* + * There should be no directory add dependencies present + * as the directory could not be truncated until all + * children were removed. + */ + KASSERT(LIST_FIRST(&pagedep->pd_pendinghd) == NULL, + ("deallocate_dependencies: pendinghd != NULL")); + for (i = 0; i < DAHASHSZ; i++) + KASSERT(LIST_FIRST(&pagedep->pd_diraddhd[i]) == NULL, + ("deallocate_dependencies: diraddhd != NULL")); + if ((pagedep->pd_state & NEWBLOCK) != 0) + free_newdirblk(pagedep->pd_newdirblk); + free_pagedep(pagedep); return (0); } @@ -5739,58 +6387,81 @@ static int * be reallocated to a new vnode. The buffer must be locked, thus, * no I/O completion operations can occur while we are manipulating * its associated dependencies. The mutex is held so that other I/O's - * associated with related dependencies do not occur. Returns 1 if - * all dependencies were cleared, 0 otherwise. + * associated with related dependencies do not occur. */ static int -deallocate_dependencies(bp, inodedep, freeblks, off) +deallocate_dependencies(bp, freeblks, off) struct buf *bp; - struct inodedep *inodedep; struct freeblks *freeblks; int off; { - struct worklist *wk; struct indirdep *indirdep; - struct allocindir *aip; struct pagedep *pagedep; + struct allocdirect *adp; + struct worklist *wk, *wkn; - mtx_assert(&lk, MA_OWNED); - while ((wk = LIST_FIRST(&bp->b_dep)) != NULL) { + ACQUIRE_LOCK(&lk); + LIST_FOREACH_SAFE(wk, &bp->b_dep, wk_list, wkn) { switch (wk->wk_type) { - case D_INDIRDEP: indirdep = WK_INDIRDEP(wk); if (bp->b_lblkno >= 0 || bp->b_blkno != indirdep->ir_savebp->b_lblkno) panic("deallocate_dependencies: not indir"); - cancel_indirdep(indirdep, bp, inodedep, freeblks); + cancel_indirdep(indirdep, bp, freeblks); continue; case D_PAGEDEP: pagedep = WK_PAGEDEP(wk); - if (cancel_pagedep(pagedep, inodedep, freeblks)) - return (0); + if (cancel_pagedep(pagedep, freeblks, off)) { + FREE_LOCK(&lk); + return (ERESTART); + } continue; case D_ALLOCINDIR: - aip = WK_ALLOCINDIR(wk); - cancel_allocindir(aip, inodedep, freeblks); + /* + * Simply remove the allocindir, we'll find it via + * the indirdep where we can clear pointers if + * needed. + */ + WORKLIST_REMOVE(wk); continue; + case D_FREEWORK: + /* + * A truncation is waiting for the zero'd pointers + * to be written. It can be freed when the freeblks + * is journaled. + */ + WORKLIST_REMOVE(wk); + wk->wk_state |= ONDEPLIST; + WORKLIST_INSERT(&freeblks->fb_freeworkhd, wk); + break; + case D_ALLOCDIRECT: - case D_INODEDEP: + adp = WK_ALLOCDIRECT(wk); + if (off != 0) + continue; + /* FALLTHROUGH */ + default: panic("deallocate_dependencies: Unexpected type %s", TYPENAME(wk->wk_type)); /* NOTREACHED */ - - default: - panic("deallocate_dependencies: Unknown type %s", - TYPENAME(wk->wk_type)); - /* NOTREACHED */ } } + FREE_LOCK(&lk); + /* + * Don't throw away this buf, we were partially truncating and + * some deps will always remain. + */ + if (off) { + bp->b_vflags |= BV_SCANNED; + return (EBUSY); + } + bp->b_flags |= B_INVAL | B_NOCACHE; - return (1); + return (0); } /* @@ -5800,20 +6471,36 @@ static int * space is no longer pointed to by the inode or in the bitmap. */ static void -cancel_allocdirect(adphead, adp, freeblks, delay) +cancel_allocdirect(adphead, adp, freeblks) struct allocdirectlst *adphead; struct allocdirect *adp; struct freeblks *freeblks; - int delay; { struct freework *freework; struct newblk *newblk; struct worklist *wk; - ufs_lbn_t lbn; TAILQ_REMOVE(adphead, adp, ad_next); newblk = (struct newblk *)adp; + freework = NULL; /* + * Find the correct freework structure. + */ + LIST_FOREACH(wk, &freeblks->fb_freeworkhd, wk_list) { + if (wk->wk_type != D_FREEWORK) + continue; + freework = WK_FREEWORK(wk); + if (freework->fw_blkno == newblk->nb_newblkno) + break; + } + if (freework == NULL) + panic("cancel_allocdirect: Freework not found"); + /* + * If a newblk exists at all we still have the journal entry that + * initiated the allocation so we do not need to journal the free. + */ + cancel_jfreeblk(freeblks, freework->fw_blkno); + /* * If the journal hasn't been written the jnewblk must be passed * to the call to ffs_blkfree that reclaims the space. We accomplish * this by linking the journal dependency into the freework to be @@ -5821,33 +6508,9 @@ static void * been written we can simply reclaim the journal space when the * freeblks work is complete. */ - if (newblk->nb_jnewblk == NULL) { - if (cancel_newblk(newblk, NULL, &freeblks->fb_jwork) != NULL) - panic("cancel_allocdirect: Unexpected dependency"); - goto found; - } - lbn = newblk->nb_jnewblk->jn_lbn; - /* - * Find the correct freework structure so it releases the canceled - * journal when the bitmap is cleared. This preserves rollback - * until the allocation is reverted. - */ - LIST_FOREACH(wk, &freeblks->fb_freeworkhd, wk_list) { - freework = WK_FREEWORK(wk); - if (freework->fw_lbn != lbn) - continue; - freework->fw_jnewblk = cancel_newblk(newblk, &freework->fw_list, - &freework->fw_jwork); - goto found; - } - panic("cancel_allocdirect: Freework not found for lbn %jd\n", lbn); -found: - if (delay) - WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait, - &newblk->nb_list); - else - free_newblk(newblk); - return; + freework->fw_jnewblk = cancel_newblk(newblk, &freework->fw_list, + &freeblks->fb_jwork); + WORKLIST_INSERT(&freeblks->fb_freeworkhd, &newblk->nb_list); } @@ -5865,33 +6528,18 @@ cancel_newblk(newblk, wk, wkhd) struct worklist *wk; struct workhead *wkhd; { - struct indirdep *indirdep; - struct allocindir *aip; struct jnewblk *jnewblk; - while ((indirdep = LIST_FIRST(&newblk->nb_indirdeps)) != NULL) { - indirdep->ir_state &= ~ONDEPLIST; - LIST_REMOVE(indirdep, ir_next); - /* - * If an indirdep is not on the buf worklist we need to - * free it here as deallocate_dependencies() will never - * find it. These pointers were never visible on disk and - * can be discarded immediately. - */ - while ((aip = LIST_FIRST(&indirdep->ir_completehd)) != NULL) { - LIST_REMOVE(aip, ai_next); - if (cancel_newblk(&aip->ai_block, NULL, wkhd) != NULL) - panic("cancel_newblk: aip has journal entry"); - free_newblk(&aip->ai_block); - } - /* - * If this indirdep is not attached to a buf it was simply - * waiting on completion to clear completehd. free_indirdep() - * asserts that nothing is dangling. - */ - if ((indirdep->ir_state & ONWORKLIST) == 0) - free_indirdep(indirdep); - } + newblk->nb_state |= GOINGAWAY; + /* + * Previously we traversed the completedhd on each indirdep + * attached to this newblk to cancel them and gather journal + * work. Since we need only the oldest journal segment and + * the lowest point on the tree will always have the oldest + * journal segment we are free to release the segments + * of any subordinates and may leave the indirdep list to + * indirdep_complete() when this newblk is freed. + */ if (newblk->nb_state & ONDEPLIST) { newblk->nb_state &= ~ONDEPLIST; LIST_REMOVE(newblk, nb_deps); @@ -5904,7 +6552,7 @@ cancel_newblk(newblk, wk, wkhd) * superseding operation completes. */ jnewblk = newblk->nb_jnewblk; - if (jnewblk != NULL) { + if (jnewblk != NULL && wk != NULL) { newblk->nb_jnewblk = NULL; jnewblk->jn_dep = wk; } @@ -5915,6 +6563,25 @@ cancel_newblk(newblk, wk, wkhd) } /* + * Schedule the freefrag associated with a newblk to be released once + * the pointers are written and the previous block is no longer needed. + */ +static void +newblk_freefrag(newblk) + struct newblk *newblk; +{ + struct freefrag *freefrag; + + if (newblk->nb_freefrag == NULL) + return; + freefrag = newblk->nb_freefrag; + newblk->nb_freefrag = NULL; + freefrag->ff_state |= COMPLETE; + if ((freefrag->ff_state & ALLCOMPLETE) == ALLCOMPLETE) + add_to_worklist(&freefrag->ff_list, 0); +} + +/* * Free a newblk. Generate a new freefrag work request if appropriate. * This must be called after the inode pointer and any direct block pointers * are valid or fully removed via truncate or frag extension. @@ -5924,34 +6591,23 @@ free_newblk(newblk) struct newblk *newblk; { struct indirdep *indirdep; - struct newdirblk *newdirblk; - struct freefrag *freefrag; struct worklist *wk; + KASSERT(newblk->nb_jnewblk == NULL, + ("free_newblk; jnewblk %p still attached", newblk->nb_jnewblk)); mtx_assert(&lk, MA_OWNED); + newblk_freefrag(newblk); if (newblk->nb_state & ONDEPLIST) LIST_REMOVE(newblk, nb_deps); if (newblk->nb_state & ONWORKLIST) WORKLIST_REMOVE(&newblk->nb_list); LIST_REMOVE(newblk, nb_hash); - if ((freefrag = newblk->nb_freefrag) != NULL) { - freefrag->ff_state |= COMPLETE; - if ((freefrag->ff_state & ALLCOMPLETE) == ALLCOMPLETE) - add_to_worklist(&freefrag->ff_list, 0); - } - if ((wk = LIST_FIRST(&newblk->nb_newdirblk)) != NULL) { - newdirblk = WK_NEWDIRBLK(wk); - WORKLIST_REMOVE(&newdirblk->db_list); - if (!LIST_EMPTY(&newblk->nb_newdirblk)) - panic("free_newblk: extra newdirblk"); - free_newdirblk(newdirblk); - } - while ((indirdep = LIST_FIRST(&newblk->nb_indirdeps)) != NULL) { - indirdep->ir_state |= DEPCOMPLETE; + if ((wk = LIST_FIRST(&newblk->nb_newdirblk)) != NULL) + free_newdirblk(WK_NEWDIRBLK(wk)); + if (!LIST_EMPTY(&newblk->nb_newdirblk)) + panic("free_newblk: extra newdirblk"); + while ((indirdep = LIST_FIRST(&newblk->nb_indirdeps)) != NULL) indirdep_complete(indirdep); - } - KASSERT(newblk->nb_jnewblk == NULL, - ("free_newblk; jnewblk %p still attached", newblk->nb_jnewblk)); handle_jwork(&newblk->nb_jwork); newblk->nb_list.wk_type = D_NEWBLK; WORKITEM_FREE(newblk, D_NEWBLK); @@ -5968,9 +6624,9 @@ free_newdirblk(newdirblk) struct pagedep *pagedep; struct diradd *dap; struct worklist *wk; - int i; mtx_assert(&lk, MA_OWNED); + WORKLIST_REMOVE(&newdirblk->db_list); /* * If the pagedep is still linked onto the directory buffer * dependency chain, then some of the entries on the @@ -5983,21 +6639,13 @@ free_newdirblk(newdirblk) */ pagedep = newdirblk->db_pagedep; pagedep->pd_state &= ~NEWBLOCK; - if ((pagedep->pd_state & ONWORKLIST) == 0) + if ((pagedep->pd_state & ONWORKLIST) == 0) { while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL) free_diradd(dap, NULL); - /* - * If no dependencies remain, the pagedep will be freed. - */ - for (i = 0; i < DAHASHSZ; i++) - if (!LIST_EMPTY(&pagedep->pd_diraddhd[i])) - break; - if (i == DAHASHSZ && (pagedep->pd_state & ONWORKLIST) == 0 && - LIST_EMPTY(&pagedep->pd_jmvrefhd)) { - KASSERT(LIST_FIRST(&pagedep->pd_dirremhd) == NULL, - ("free_newdirblk: Freeing non-free pagedep %p", pagedep)); - LIST_REMOVE(pagedep, pd_hash); - WORKITEM_FREE(pagedep, D_PAGEDEP); + /* + * If no dependencies remain, the pagedep will be freed. + */ + free_pagedep(pagedep); } /* Should only ever be one item in the list. */ while ((wk = LIST_FIRST(&newdirblk->db_mkdir)) != NULL) { @@ -6020,6 +6668,7 @@ softdep_freefile(pvp, ino, mode) struct inode *ip = VTOI(pvp); struct inodedep *inodedep; struct freefile *freefile; + struct freeblks *freeblks; /* * This sets up the inode de-allocation dependency. @@ -6048,28 +6697,38 @@ softdep_freefile(pvp, ino, mode) */ ACQUIRE_LOCK(&lk); inodedep_lookup(pvp->v_mount, ino, 0, &inodedep); - /* - * Remove this inode from the unlinked list and set - * GOINGAWAY as appropriate to indicate that this inode - * will never be written. - */ - if (inodedep && inodedep->id_state & UNLINKED) { + if (inodedep) { /* - * Save the journal work to be freed with the bitmap - * before we clear UNLINKED. Otherwise it can be lost - * if the inode block is written. + * Clear out freeblks that no longer need to reference + * this inode. */ - handle_bufwait(inodedep, &freefile->fx_jwork); - clear_unlinked_inodedep(inodedep); - /* Re-acquire inodedep as we've dropped lk. */ - inodedep_lookup(pvp->v_mount, ino, 0, &inodedep); + while ((freeblks = + TAILQ_FIRST(&inodedep->id_freeblklst)) != NULL) { + TAILQ_REMOVE(&inodedep->id_freeblklst, freeblks, + fb_next); + freeblks->fb_state &= ~ONDEPLIST; + } + /* + * Remove this inode from the unlinked list. + */ + if (inodedep->id_state & UNLINKED) { + /* + * Save the journal work to be freed with the bitmap + * before we clear UNLINKED. Otherwise it can be lost + * if the inode block is written. + */ + handle_bufwait(inodedep, &freefile->fx_jwork); + clear_unlinked_inodedep(inodedep); + /* Re-acquire inodedep as we've dropped lk. */ + inodedep_lookup(pvp->v_mount, ino, 0, &inodedep); + } } if (inodedep == NULL || check_inode_unwritten(inodedep)) { FREE_LOCK(&lk); handle_workitem_freefile(freefile); return; } - if (inodedep && (inodedep->id_state & DEPCOMPLETE) == 0) + if ((inodedep->id_state & DEPCOMPLETE) == 0) inodedep->id_state |= GOINGAWAY; WORKLIST_INSERT(&inodedep->id_inowait, &freefile->fx_list); FREE_LOCK(&lk); @@ -6154,6 +6813,7 @@ free_inodedep(inodedep) !TAILQ_EMPTY(&inodedep->id_newinoupdt) || !TAILQ_EMPTY(&inodedep->id_extupdt) || !TAILQ_EMPTY(&inodedep->id_newextupdt) || + !TAILQ_EMPTY(&inodedep->id_freeblklst) || inodedep->id_mkdiradd != NULL || inodedep->id_nlinkdelta != 0 || inodedep->id_savedino1 != NULL) @@ -6181,54 +6841,59 @@ freework_freeblock(freework) struct ufsmount *ump; struct workhead wkhd; struct fs *fs; - int pending; int bsize; int needj; + mtx_assert(&lk, MA_OWNED); + /* + * Handle partial truncate separately. + */ + if (freework->fw_indir) { + complete_trunc_indir(freework); + return; + } freeblks = freework->fw_freeblks; ump = VFSTOUFS(freeblks->fb_list.wk_mp); fs = ump->um_fs; needj = freeblks->fb_list.wk_mp->mnt_kern_flag & MNTK_SUJ; bsize = lfragtosize(fs, freework->fw_frags); - pending = btodb(bsize); LIST_INIT(&wkhd); /* + * DEPCOMPLETE is cleared in indirblk_insert() if the block lives + * on the indirblk hashtable and prevents premature freeing. + */ + freework->fw_state |= DEPCOMPLETE; + /* + * SUJ needs to wait for the segment referencing freed indirect + * blocks to expire so that we know the checker will not confuse + * a re-allocated indirect block with its old contents. + */ + if (needj && freework->fw_lbn <= -NDADDR) + indirblk_insert(freework); + /* * If we are canceling an existing jnewblk pass it to the free * routine, otherwise pass the freeblk which will ultimately * release the freeblks. If we're not journaling, we can just * free the freeblks immediately. */ - ACQUIRE_LOCK(&lk); - LIST_SWAP(&wkhd, &freework->fw_jwork, worklist, wk_list); jnewblk = freework->fw_jnewblk; if (jnewblk != NULL) { - /* Could've already been canceled in indir_trunc(). */ - if ((jnewblk->jn_state & GOINGAWAY) == 0) - cancel_jnewblk(jnewblk, &wkhd); + cancel_jnewblk(jnewblk, &wkhd); needj = 0; } else if (needj) WORKLIST_INSERT(&wkhd, &freework->fw_list); - freeblks->fb_chkcnt -= pending; + freeblks->fb_freecnt += btodb(bsize); FREE_LOCK(&lk); - /* - * extattr blocks don't show up in pending blocks. XXX why? - */ - if (freework->fw_lbn >= 0 || freework->fw_lbn <= -NDADDR) { - UFS_LOCK(ump); - fs->fs_pendingblocks -= pending; - UFS_UNLOCK(ump); - } - ffs_blkfree(ump, fs, freeblks->fb_devvp, freework->fw_blkno, - bsize, freeblks->fb_previousinum, &wkhd); + ffs_blkfree(ump, fs, freeblks->fb_devvp, freework->fw_blkno, bsize, + freeblks->fb_inum, &wkhd); + ACQUIRE_LOCK(&lk); if (needj) return; /* * The jnewblk will be discarded and the bits in the map never * made it to disk. We can immediately free the freeblk. */ - ACQUIRE_LOCK(&lk); handle_written_freework(freework); - FREE_LOCK(&lk); } /* @@ -6246,15 +6911,21 @@ handle_workitem_indirblk(freework) struct ufsmount *ump; struct fs *fs; - freeblks = freework->fw_freeblks; ump = VFSTOUFS(freeblks->fb_list.wk_mp); fs = ump->um_fs; - if (freework->fw_off == NINDIR(fs)) + if (freework->fw_state & DEPCOMPLETE) { + handle_written_freework(freework); + return; + } + if (freework->fw_off == NINDIR(fs)) { freework_freeblock(freework); - else - indir_trunc(freework, fsbtodb(fs, freework->fw_blkno), - freework->fw_lbn); + return; + } + FREE_LOCK(&lk); + indir_trunc(freework, fsbtodb(fs, freework->fw_blkno), + freework->fw_lbn); + ACQUIRE_LOCK(&lk); } /* @@ -6268,48 +6939,17 @@ handle_written_freework(freework) { struct freeblks *freeblks; struct freework *parent; - struct jsegdep *jsegdep; - struct worklist *wk; - int needj; - needj = 0; freeblks = freework->fw_freeblks; parent = freework->fw_parent; - /* - * SUJ needs to wait for the segment referencing freed indirect - * blocks to expire so that we know the checker will not confuse - * a re-allocated indirect block with its old contents. - */ - if (freework->fw_lbn <= -NDADDR && - freework->fw_list.wk_mp->mnt_kern_flag & MNTK_SUJ) { - LIST_FOREACH(wk, &freeblks->fb_jwork, wk_list) - if (wk->wk_type == D_JSEGDEP) - break; - if (wk) { - jsegdep = WK_JSEGDEP(wk); - LIST_INSERT_HEAD(&jsegdep->jd_seg->js_indirs, - freework, fw_next); - WORKLIST_INSERT(INDIR_HASH(freework->fw_list.wk_mp, - freework->fw_blkno), &freework->fw_list); - needj = 1; - } - } if (parent) { - if (--parent->fw_ref != 0) - parent = NULL; - freeblks = NULL; - } else if (--freeblks->fb_ref != 0) - freeblks = NULL; - if (needj == 0) + if (--parent->fw_ref == 0) + add_to_worklist(&parent->fw_list, 1); + } else if (--freeblks->fb_ref == 0) + add_to_worklist(&freeblks->fb_list, 1); + freework->fw_state |= COMPLETE; + if ((freework->fw_state & ALLCOMPLETE) == ALLCOMPLETE) WORKITEM_FREE(freework, D_FREEWORK); - /* - * Don't delay these block frees or it takes an intolerable amount - * of time to process truncates and free their journal entries. - */ - if (freeblks) - add_to_worklist(&freeblks->fb_list, 1); - if (parent) - add_to_worklist(&parent->fw_list, 1); } /* @@ -6326,32 +6966,68 @@ handle_workitem_freeblocks(freeblks, flags) int flags; { struct freework *freework; + struct newblk *newblk; + struct allocindir *aip; + struct ufsmount *ump; struct worklist *wk; KASSERT(LIST_EMPTY(&freeblks->fb_jfreeblkhd), ("handle_workitem_freeblocks: Journal entries not written.")); if (LIST_EMPTY(&freeblks->fb_freeworkhd)) { - handle_complete_freeblocks(freeblks); + handle_complete_freeblocks(freeblks, flags); return; } - freeblks->fb_ref++; + ump = VFSTOUFS(freeblks->fb_list.wk_mp); + ACQUIRE_LOCK(&lk); while ((wk = LIST_FIRST(&freeblks->fb_freeworkhd)) != NULL) { - KASSERT(wk->wk_type == D_FREEWORK, - ("handle_workitem_freeblocks: Unknown type %s", - TYPENAME(wk->wk_type))); - WORKLIST_REMOVE_UNLOCKED(wk); - freework = WK_FREEWORK(wk); - if (freework->fw_lbn <= -NDADDR) - handle_workitem_indirblk(freework); - else - freework_freeblock(freework); + WORKLIST_REMOVE(wk); + switch (wk->wk_type) { + case D_DIRREM: + wk->wk_state |= COMPLETE; + add_to_worklist(wk, 0); + continue; + + case D_ALLOCDIRECT: + free_newblk(WK_NEWBLK(wk)); + continue; + + case D_ALLOCINDIR: + aip = WK_ALLOCINDIR(wk); + freework = NULL; + if (aip->ai_state & DELAYEDFREE) { + FREE_LOCK(&lk); + freework = newfreework(ump, freeblks, NULL, + aip->ai_lbn, aip->ai_newblkno, + ump->um_fs->fs_frag, 0, 0); + ACQUIRE_LOCK(&lk); + } + newblk = WK_NEWBLK(wk); + if (newblk->nb_jnewblk) { + freework->fw_jnewblk = newblk->nb_jnewblk; + newblk->nb_jnewblk->jn_dep = &freework->fw_list; + newblk->nb_jnewblk = NULL; + } + free_newblk(newblk); + continue; + + case D_FREEWORK: + wk->wk_state &= ~ONDEPLIST; + freework = WK_FREEWORK(wk); + if (freework->fw_lbn <= -NDADDR) + handle_workitem_indirblk(freework); + else + freework_freeblock(freework); + continue; + default: + panic("handle_workitem_freeblocks: Unknown type %s", + TYPENAME(wk->wk_type)); + } } - ACQUIRE_LOCK(&lk); if (--freeblks->fb_ref != 0) freeblks = NULL; FREE_LOCK(&lk); if (freeblks) - handle_complete_freeblocks(freeblks); + handle_complete_freeblocks(freeblks, flags); } /* @@ -6360,40 +7036,76 @@ handle_workitem_freeblocks(freeblks, flags) * can not be called until all other dependencies are stable on disk. */ static void -handle_complete_freeblocks(freeblks) +handle_complete_freeblocks(freeblks, flags) struct freeblks *freeblks; + int flags; { + struct inodedep *inodedep; struct inode *ip; struct vnode *vp; struct fs *fs; struct ufsmount *ump; - int flags; + ufs2_daddr_t spare; ump = VFSTOUFS(freeblks->fb_list.wk_mp); fs = ump->um_fs; - flags = LK_NOWAIT; + flags = LK_EXCLUSIVE | (flags & LK_NOWAIT); + spare = freeblks->fb_freecnt - freeblks->fb_chkcnt; /* - * If we still have not finished background cleanup, then check - * to see if the block count needs to be adjusted. + * If we did not release the expected number of blocks we may have + * to adjust the inode block count here. Only do so if it wasn't + * a truncation to zero and the modrev still matches. */ - if (freeblks->fb_chkcnt != 0 && (fs->fs_flags & FS_UNCLEAN) != 0 && - ffs_vgetf(freeblks->fb_list.wk_mp, freeblks->fb_previousinum, - (flags & LK_NOWAIT) | LK_EXCLUSIVE, &vp, FFSV_FORCEINSMQ) == 0) { + if (spare && freeblks->fb_len != 0) { + if (ffs_vgetf(freeblks->fb_list.wk_mp, freeblks->fb_inum, + flags, &vp, FFSV_FORCEINSMQ) != 0) { + ACQUIRE_LOCK(&lk); + add_to_worklist(&freeblks->fb_list, 1); + FREE_LOCK(&lk); + return; + } ip = VTOI(vp); - DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + freeblks->fb_chkcnt); - ip->i_flag |= IN_CHANGE; + if (DIP(ip, i_modrev) == freeblks->fb_modrev) { + DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - spare); + ip->i_flag |= IN_CHANGE; + /* + * We must wait so this happens before the + * journal is reclaimed. + */ + ffs_update(vp, 1); + } vput(vp); + spare = 0; } + if (freeblks->fb_chkcnt) { + UFS_LOCK(ump); + fs->fs_pendingblocks -= freeblks->fb_chkcnt; + UFS_UNLOCK(ump); + } +#ifdef QUOTA + /* Handle spare. */ +#endif - if (!(freeblks->fb_chkcnt == 0 || +#if 0 + if (!(spare != 0 || ((fs->fs_flags & FS_UNCLEAN) != 0 && (flags & LK_NOWAIT) == 0))) - printf( - "handle_workitem_freeblocks: inode %ju block count %jd\n", - (uintmax_t)freeblks->fb_previousinum, - (intmax_t)freeblks->fb_chkcnt); + printf("handle_complete_freeblocks: " + "inode %ju block count %jd actual %jd\n", + (uintmax_t)freeblks->fb_inum, + (intmax_t)freeblks->fb_chkcnt, + (intmax_t)freeblks->fb_freecnt); +#endif ACQUIRE_LOCK(&lk); + if (freeblks->fb_state & ONDEPLIST) { + inodedep_lookup(freeblks->fb_list.wk_mp, freeblks->fb_inum, + 0, &inodedep); + TAILQ_REMOVE(&inodedep->id_freeblklst, freeblks, fb_next); + freeblks->fb_state &= ~ONDEPLIST; + if (TAILQ_EMPTY(&inodedep->id_freeblklst)) + free_inodedep(inodedep); + } /* * All of the freeblock deps must be complete prior to this call * so it's now safe to complete earlier outstanding journal entries. @@ -6404,10 +7116,15 @@ static void } /* - * Release blocks associated with the inode ip and stored in the indirect + * Release blocks associated with the freeblks and stored in the indirect * block dbn. If level is greater than SINGLE, the block is an indirect block * and recursive calls to indirtrunc must be used to cleanse other indirect * blocks. + * + * This handles partial and complete truncation of blocks. Partial is noted + * with goingaway == 0. In this case the freework is completed after the + * zero'd indirects are written to disk. For full truncation the freework + * is completed after the block is freed. */ static void indir_trunc(freework, dbn, lbn) @@ -6417,165 +7134,105 @@ indir_trunc(freework, dbn, lbn) { struct freework *nfreework; struct workhead wkhd; - struct jnewblk *jnewblkn; - struct jnewblk *jnewblk; struct freeblks *freeblks; struct buf *bp; struct fs *fs; - struct worklist *wkn; - struct worklist *wk; struct indirdep *indirdep; struct ufsmount *ump; ufs1_daddr_t *bap1 = 0; ufs2_daddr_t nb, nnb, *bap2 = 0; - ufs_lbn_t lbnadd; + ufs_lbn_t lbnadd, nlbn; int i, nblocks, ufs1fmt; int fs_pendingblocks; + int goingaway; int freedeps; int needj; int level; int cnt; - LIST_INIT(&wkhd); - level = lbn_level(lbn); - if (level == -1) - panic("indir_trunc: Invalid lbn %jd\n", lbn); freeblks = freework->fw_freeblks; ump = VFSTOUFS(freeblks->fb_list.wk_mp); fs = ump->um_fs; - fs_pendingblocks = 0; - freedeps = 0; - needj = UFSTOVFS(ump)->mnt_kern_flag & MNTK_SUJ; - lbnadd = lbn_offset(fs, level); /* - * Get buffer of block pointers to be freed. This routine is not - * called until the zero'ed inode has been written, so it is safe - * to free blocks as they are encountered. Because the inode has - * been zero'ed, calls to bmap on these blocks will fail. So, we - * have to use the on-disk address and the block device for the - * filesystem to look them up. If the file was deleted before its - * indirect blocks were all written to disk, the routine that set - * us up (deallocate_dependencies) will have arranged to leave - * a complete copy of the indirect block in memory for our use. - * Otherwise we have to read the blocks in from the disk. + * Get buffer of block pointers to be freed. There are three cases: + * + * 1) Partial truncate caches the indirdep pointer in the freework + * which provides us a back copy to the save bp which holds the + * pointers we want to clear. When this completes the zero + * pointers are written to the real copy. + * 2) The indirect is being completely truncated, cancel_indirdep() + * eliminated the real copy and placed the indirdep on the saved + * copy. The indirdep and buf are discarded when this completes. + * 3) The indirect was not in memory, we read a copy off of the disk + * using the devvp and drop and invalidate the buffer when we're + * done. */ -#ifdef notyet - bp = getblk(freeblks->fb_devvp, dbn, (int)fs->fs_bsize, 0, 0, - GB_NOCREAT); -#else - bp = incore(&freeblks->fb_devvp->v_bufobj, dbn); -#endif + goingaway = 1; + indirdep = NULL; + if (freework->fw_indir != NULL) { + goingaway = 0; + indirdep = freework->fw_indir; + bp = indirdep->ir_savebp; + if (bp == NULL || bp->b_blkno != dbn) + panic("indir_trunc: Bad saved buf %p blkno %jd", + bp, (intmax_t)dbn); + } else if ((bp = incore(&freeblks->fb_devvp->v_bufobj, dbn)) != NULL) { + /* + * The lock prevents the buf dep list from changing and + * indirects on devvp should only ever have one dependency. + */ + indirdep = WK_INDIRDEP(LIST_FIRST(&bp->b_dep)); + if (indirdep == NULL || (indirdep->ir_state & GOINGAWAY) == 0) + panic("indir_trunc: Bad indirdep %p from buf %p", + indirdep, bp); + } else if (bread(freeblks->fb_devvp, dbn, (int)fs->fs_bsize, + NOCRED, &bp) != 0) { + brelse(bp); + return; + } ACQUIRE_LOCK(&lk); - if (bp != NULL && (wk = LIST_FIRST(&bp->b_dep)) != NULL) { - if (wk->wk_type != D_INDIRDEP || - (wk->wk_state & GOINGAWAY) == 0) - panic("indir_trunc: lost indirdep %p", wk); - indirdep = WK_INDIRDEP(wk); - LIST_SWAP(&wkhd, &indirdep->ir_jwork, worklist, wk_list); - LIST_FOREACH_SAFE(jnewblk, &indirdep->ir_jnewblkhd, - jn_indirdeps, jnewblkn) { - /* - * XXX This cancel may cause some lengthy delay - * before the record is reclaimed below. - */ - LIST_REMOVE(jnewblk, jn_indirdeps); - cancel_jnewblk(jnewblk, &wkhd); - } - - free_indirdep(indirdep); - if (!LIST_EMPTY(&bp->b_dep)) - panic("indir_trunc: dangling dep %p", - LIST_FIRST(&bp->b_dep)); - ump->um_numindirdeps -= 1; - FREE_LOCK(&lk); - } else { -#ifdef notyet - if (bp) - brelse(bp); -#endif - FREE_LOCK(&lk); - if (bread(freeblks->fb_devvp, dbn, (int)fs->fs_bsize, - NOCRED, &bp) != 0) { - brelse(bp); + /* + * If we have an indirdep we need to enforce the truncation order + * and discard it when it is complete. + */ + if (indirdep) { + if (freework != TAILQ_FIRST(&indirdep->ir_trunc) && + !TAILQ_EMPTY(&indirdep->ir_trunc)) { + add_to_worklist(&freework->fw_list, 0); + FREE_LOCK(&lk); return; } + /* + * If we're goingaway, free the indirdep. Otherwise it will + * linger until the write completes. + */ + if (goingaway) { + free_indirdep(indirdep); + ump->um_numindirdeps -= 1; + } } - /* - * Recursively free indirect blocks. - */ + FREE_LOCK(&lk); + /* Initialize pointers depending on block size. */ if (ump->um_fstype == UFS1) { + bap1 = (ufs1_daddr_t *)bp->b_data; + nb = bap1[freework->fw_off]; ufs1fmt = 1; - bap1 = (ufs1_daddr_t *)bp->b_data; } else { + bap2 = (ufs2_daddr_t *)bp->b_data; + nb = bap2[freework->fw_off]; ufs1fmt = 0; - bap2 = (ufs2_daddr_t *)bp->b_data; } - - /* - * Reclaim indirect blocks which never made it to disk. - */ - cnt = 0; - LIST_FOREACH_SAFE(wk, &wkhd, wk_list, wkn) { - if (wk->wk_type != D_JNEWBLK) - continue; - /* XXX Is the lock necessary here for more than an assert? */ - ACQUIRE_LOCK(&lk); - WORKLIST_REMOVE(wk); - FREE_LOCK(&lk); - jnewblk = WK_JNEWBLK(wk); - if (jnewblk->jn_lbn > 0) - i = (jnewblk->jn_lbn - -lbn) / lbnadd; - else - i = (-(jnewblk->jn_lbn + level - 1) - -(lbn + level)) / - lbnadd; - KASSERT(i >= 0 && i < NINDIR(fs), - ("indir_trunc: Index out of range %d parent %jd lbn %jd level %d", - i, lbn, jnewblk->jn_lbn, level)); - /* Clear the pointer so it isn't found below. */ - if (ufs1fmt) { - nb = bap1[i]; - bap1[i] = 0; - } else { - nb = bap2[i]; - bap2[i] = 0; - } - KASSERT(nb == jnewblk->jn_blkno, - ("indir_trunc: Block mismatch %jd != %jd", - nb, jnewblk->jn_blkno)); - if (level != 0) { - ufs_lbn_t nlbn; - - nlbn = (lbn + 1) - (i * lbnadd); - nfreework = newfreework(ump, freeblks, freework, - nlbn, nb, fs->fs_frag, 0); - nfreework->fw_jnewblk = jnewblk; - freedeps++; - indir_trunc(nfreework, fsbtodb(fs, nb), nlbn); - } else { - struct workhead freewk; - - LIST_INIT(&freewk); - ACQUIRE_LOCK(&lk); - WORKLIST_INSERT(&freewk, wk); - FREE_LOCK(&lk); - ffs_blkfree(ump, fs, freeblks->fb_devvp, - jnewblk->jn_blkno, fs->fs_bsize, - freeblks->fb_previousinum, &freewk); - } - cnt++; - } - ACQUIRE_LOCK(&lk); - /* Any remaining journal work can be completed with freeblks. */ - jwork_move(&freeblks->fb_jwork, &wkhd); - FREE_LOCK(&lk); + level = lbn_level(lbn); + needj = UFSTOVFS(ump)->mnt_kern_flag & MNTK_SUJ; + lbnadd = lbn_offset(fs, level); nblocks = btodb(fs->fs_bsize); - if (ufs1fmt) - nb = bap1[0]; - else - nb = bap2[0]; nfreework = freework; + freedeps = 0; + cnt = 0; /* - * Reclaim on disk blocks. + * Reclaim blocks. Traverses into nested indirect levels and + * arranges for the current level to be freed when subordinates + * are free when journaling. */ for (i = freework->fw_off; i < NINDIR(fs); i++, nb = nnb) { if (i != NINDIR(fs) - 1) { @@ -6589,12 +7246,10 @@ indir_trunc(freework, dbn, lbn) continue; cnt++; if (level != 0) { - ufs_lbn_t nlbn; - nlbn = (lbn + 1) - (i * lbnadd); if (needj != 0) { nfreework = newfreework(ump, freeblks, freework, - nlbn, nb, fs->fs_frag, 0); + nlbn, nb, fs->fs_frag, 0, 0); freedeps++; } indir_trunc(nfreework, fsbtodb(fs, nb), nlbn); @@ -6614,85 +7269,102 @@ indir_trunc(freework, dbn, lbn) freedeps++; } ffs_blkfree(ump, fs, freeblks->fb_devvp, nb, - fs->fs_bsize, freeblks->fb_previousinum, &wkhd); + fs->fs_bsize, freeblks->fb_inum, &wkhd); } } + if (goingaway) { + bp->b_flags |= B_INVAL | B_NOCACHE; + brelse(bp); + } + fs_pendingblocks = 0; if (level == 0) fs_pendingblocks = (nblocks * cnt); /* - * If we're not journaling we can free the indirect now. Otherwise - * setup the ref counts and offset so this indirect can be completed - * when its children are free. + * If we are journaling set up the ref counts and offset so this + * indirect can be completed when its children are free. */ - if (needj == 0) { - fs_pendingblocks += nblocks; - dbn = dbtofsb(fs, dbn); - ffs_blkfree(ump, fs, freeblks->fb_devvp, dbn, fs->fs_bsize, - freeblks->fb_previousinum, NULL); + if (needj) { ACQUIRE_LOCK(&lk); - freeblks->fb_chkcnt -= fs_pendingblocks; - if (freework->fw_blkno == dbn) - handle_written_freework(freework); - FREE_LOCK(&lk); - freework = NULL; - } else { - ACQUIRE_LOCK(&lk); freework->fw_off = i; freework->fw_ref += freedeps; freework->fw_ref -= NINDIR(fs) + 1; - if (freework->fw_ref != 0) - freework = NULL; - freeblks->fb_chkcnt -= fs_pendingblocks; + freeblks->fb_freecnt += fs_pendingblocks; + if (freework->fw_ref == 0) + freework_freeblock(freework); FREE_LOCK(&lk); + return; } - if (fs_pendingblocks) { - UFS_LOCK(ump); - fs->fs_pendingblocks -= fs_pendingblocks; - UFS_UNLOCK(ump); + /* + * If we're not journaling we can free the indirect now. + */ + fs_pendingblocks += nblocks; + dbn = dbtofsb(fs, dbn); + ffs_blkfree(ump, fs, freeblks->fb_devvp, dbn, fs->fs_bsize, + freeblks->fb_inum, NULL); + /* Non SUJ softdep does single-threaded truncations. */ + freeblks->fb_freecnt += fs_pendingblocks; + if (freework->fw_blkno == dbn) { + ACQUIRE_LOCK(&lk); + handle_written_freework(freework); + FREE_LOCK(&lk); } - bp->b_flags |= B_INVAL | B_NOCACHE; - brelse(bp); - if (freework) - handle_workitem_indirblk(freework); return; } /* - * Cancel an allocindir when it is removed via truncation. + * Cancel an allocindir when it is removed via truncation. When bp is not + * NULL the indirect never appeared on disk and is scheduled to be freed + * independently of the indir so we can more easily track journal work. */ static void -cancel_allocindir(aip, inodedep, freeblks) +cancel_allocindir(aip, bp, freeblks, trunc) struct allocindir *aip; - struct inodedep *inodedep; + struct buf *bp; struct freeblks *freeblks; + int trunc; { - struct jnewblk *jnewblk; + struct indirdep *indirdep; + struct freefrag *freefrag; struct newblk *newblk; + newblk = (struct newblk *)aip; + LIST_REMOVE(aip, ai_next); /* + * We must eliminate the pointer in bp if it must be freed on its + * own due to partial truncate or pending journal work. + */ + if (bp && (trunc || newblk->nb_jnewblk)) { + /* + * Clear the pointer and mark the aip to be freed + * directly if it never existed on disk. + */ + aip->ai_state |= DELAYEDFREE; + indirdep = aip->ai_indirdep; + if (indirdep->ir_state & UFS1FMT) + ((ufs1_daddr_t *)bp->b_data)[aip->ai_offset] = 0; + else + ((ufs2_daddr_t *)bp->b_data)[aip->ai_offset] = 0; + } + /* + * When truncating the previous pointer will be freed via + * savedbp. Eliminate the freefrag which would dup free. + */ + if (trunc && (freefrag = newblk->nb_freefrag) != NULL) { + newblk->nb_freefrag = NULL; + if (freefrag->ff_jdep) + cancel_jfreefrag( + WK_JFREEFRAG(freefrag->ff_jdep)); + jwork_move(&freeblks->fb_jwork, &freefrag->ff_jwork); + WORKITEM_FREE(freefrag, D_FREEFRAG); + } + /* * If the journal hasn't been written the jnewblk must be passed * to the call to ffs_blkfree that reclaims the space. We accomplish - * this by linking the journal dependency into the indirdep to be - * freed when indir_trunc() is called. If the journal has already - * been written we can simply reclaim the journal space when the - * freeblks work is complete. + * this by leaving the journal dependency on the newblk to be freed + * when a freework is created in handle_workitem_freeblocks(). */ - LIST_REMOVE(aip, ai_next); - newblk = (struct newblk *)aip; - if (newblk->nb_jnewblk == NULL) { - if (cancel_newblk(newblk, NULL, &freeblks->fb_jwork)) - panic("cancel_allocindir: Unexpected dependency."); - } else { - jnewblk = cancel_newblk(newblk, &aip->ai_indirdep->ir_list, - &aip->ai_indirdep->ir_jwork); - if (jnewblk) - LIST_INSERT_HEAD(&aip->ai_indirdep->ir_jnewblkhd, - jnewblk, jn_indirdeps); - } - if (inodedep && inodedep->id_state & DEPCOMPLETE) - WORKLIST_INSERT(&inodedep->id_bufwait, &newblk->nb_list); - else - free_newblk(newblk); + cancel_newblk(newblk, NULL, &freeblks->fb_jwork); + WORKLIST_INSERT(&freeblks->fb_freeworkhd, &newblk->nb_list); } /* @@ -6788,7 +7460,7 @@ setup_newdir(dap, newinum, dinum, newdirbp, mkdirp WORKITEM_FREE(mkdir2, D_MKDIR); } else { LIST_INSERT_HEAD(&mkdirlisthd, mkdir2, md_mkdirs); - WORKLIST_INSERT(&inodedep->id_bufwait,&mkdir2->md_list); + WORKLIST_INSERT(&inodedep->id_bufwait, &mkdir2->md_list); } *mkdirp = mkdir2; @@ -7540,6 +8212,7 @@ newdirrem(bp, dp, ip, isrmdir, prevdirremp) &pagedep) == 0) WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list); dirrem->dm_pagedep = pagedep; + dirrem->dm_offset = offset; /* * If we're renaming a .. link to a new directory, cancel any * existing MKDIR_PARENT mkdir. If it has already been canceled @@ -8372,7 +9045,7 @@ softdep_disk_io_initiation(bp) freeblks = WK_FREEBLKS(wk); jfreeblk = LIST_FIRST(&freeblks->fb_jfreeblkhd); /* - * We have to wait for the jfreeblks to be journaled + * We have to wait for the freeblks to be journaled * before we can write an inodeblock with updated * pointers. Be careful to arrange the marker so * we revisit the jfreeblk if it's not removed by @@ -8396,7 +9069,7 @@ softdep_disk_io_initiation(bp) */ newblk = WK_NEWBLK(wk); if (newblk->nb_jnewblk != NULL && - indirblk_inseg(newblk->nb_list.wk_mp, + indirblk_lookup(newblk->nb_list.wk_mp, newblk->nb_newblkno)) { LIST_REMOVE(&marker, wk_list); LIST_INSERT_BEFORE(wk, &marker, wk_list); @@ -8900,10 +9573,9 @@ initiate_write_inodeblock_ufs2(inodedep, bp) * list. */ static void -cancel_indirdep(indirdep, bp, inodedep, freeblks) +cancel_indirdep(indirdep, bp, freeblks) struct indirdep *indirdep; struct buf *bp; - struct inodedep *inodedep; struct freeblks *freeblks; { struct allocindir *aip; @@ -8924,24 +9596,38 @@ static void */ if (indirdep->ir_state & GOINGAWAY) panic("cancel_indirdep: already gone"); - if (indirdep->ir_state & ONDEPLIST) { - indirdep->ir_state &= ~ONDEPLIST; + if ((indirdep->ir_state & DEPCOMPLETE) == 0) { + indirdep->ir_state |= DEPCOMPLETE; LIST_REMOVE(indirdep, ir_next); } indirdep->ir_state |= GOINGAWAY; VFSTOUFS(indirdep->ir_list.wk_mp)->um_numindirdeps += 1; + /* + * Pass in bp for blocks still have journal writes + * pending so we can cancel them on their own. + */ while ((aip = LIST_FIRST(&indirdep->ir_deplisthd)) != 0) - cancel_allocindir(aip, inodedep, freeblks); + cancel_allocindir(aip, bp, freeblks, 0); while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != 0) - cancel_allocindir(aip, inodedep, freeblks); + cancel_allocindir(aip, NULL, freeblks, 0); while ((aip = LIST_FIRST(&indirdep->ir_writehd)) != 0) - cancel_allocindir(aip, inodedep, freeblks); + cancel_allocindir(aip, NULL, freeblks, 0); while ((aip = LIST_FIRST(&indirdep->ir_completehd)) != 0) - cancel_allocindir(aip, inodedep, freeblks); - bcopy(bp->b_data, indirdep->ir_savebp->b_data, bp->b_bcount); + cancel_allocindir(aip, NULL, freeblks, 0); + /* + * If there are pending partial truncations we need to keep the + * old block copy around until they complete. This is because + * the current b_data is not a perfect superset of the available + * blocks. + */ + if (TAILQ_EMPTY(&indirdep->ir_trunc)) + bcopy(bp->b_data, indirdep->ir_savebp->b_data, bp->b_bcount); + else + bcopy(bp->b_data, indirdep->ir_saveddata, bp->b_bcount); WORKLIST_REMOVE(&indirdep->ir_list); WORKLIST_INSERT(&indirdep->ir_savebp->b_dep, &indirdep->ir_list); - indirdep->ir_savebp = NULL; + indirdep->ir_bp = NULL; + indirdep->ir_freeblks = freeblks; } /* @@ -8952,10 +9638,8 @@ free_indirdep(indirdep) struct indirdep *indirdep; { - KASSERT(LIST_EMPTY(&indirdep->ir_jwork), - ("free_indirdep: Journal work not empty.")); - KASSERT(LIST_EMPTY(&indirdep->ir_jnewblkhd), - ("free_indirdep: Journal new block list not empty.")); + KASSERT(TAILQ_EMPTY(&indirdep->ir_trunc), + ("free_indirdep: Indir trunc list not empty.")); KASSERT(LIST_EMPTY(&indirdep->ir_completehd), ("free_indirdep: Complete head not empty.")); KASSERT(LIST_EMPTY(&indirdep->ir_writehd), @@ -8964,10 +9648,10 @@ free_indirdep(indirdep) ("free_indirdep: done head not empty.")); KASSERT(LIST_EMPTY(&indirdep->ir_deplisthd), ("free_indirdep: deplist head not empty.")); - KASSERT(indirdep->ir_savebp == NULL, - ("free_indirdep: %p ir_savebp != NULL", indirdep)); - KASSERT((indirdep->ir_state & ONDEPLIST) == 0, - ("free_indirdep: %p still on deplist.", indirdep)); + KASSERT((indirdep->ir_state & DEPCOMPLETE), + ("free_indirdep: %p still on newblk list.", indirdep)); + KASSERT(indirdep->ir_saveddata == NULL, + ("free_indirdep: %p still has saved data.", indirdep)); if (indirdep->ir_state & ONWORKLIST) WORKLIST_REMOVE(&indirdep->ir_list); WORKITEM_FREE(indirdep, D_INDIRDEP); @@ -8984,22 +9668,25 @@ initiate_write_indirdep(indirdep, bp) struct buf *bp; { + indirdep->ir_state |= IOSTARTED; if (indirdep->ir_state & GOINGAWAY) panic("disk_io_initiation: indirdep gone"); - /* * If there are no remaining dependencies, this will be writing * the real pointers. */ - if (LIST_EMPTY(&indirdep->ir_deplisthd)) + if (LIST_EMPTY(&indirdep->ir_deplisthd) && + TAILQ_EMPTY(&indirdep->ir_trunc)) return; /* * Replace up-to-date version with safe version. */ - FREE_LOCK(&lk); - indirdep->ir_saveddata = malloc(bp->b_bcount, M_INDIRDEP, - M_SOFTDEP_FLAGS); - ACQUIRE_LOCK(&lk); + if (indirdep->ir_saveddata == NULL) { + FREE_LOCK(&lk); + indirdep->ir_saveddata = malloc(bp->b_bcount, M_INDIRDEP, + M_SOFTDEP_FLAGS); + ACQUIRE_LOCK(&lk); + } indirdep->ir_state &= ~ATTACHED; indirdep->ir_state |= UNDONE; bcopy(bp->b_data, indirdep->ir_saveddata, bp->b_bcount); @@ -9066,11 +9753,11 @@ softdep_setup_blkfree(mp, bp, blkno, frags, wkhd) int frags; struct workhead *wkhd; { + struct bmsafemap *bmsafemap; struct jnewblk *jnewblk; - struct worklist *wk, *wkn; + struct worklist *wk; + struct fs *fs; #ifdef SUJ_DEBUG - struct bmsafemap *bmsafemap; - struct fs *fs; uint8_t *blksfree; struct cg *cgp; ufs2_daddr_t jstart; @@ -9081,25 +9768,29 @@ softdep_setup_blkfree(mp, bp, blkno, frags, wkhd) #endif ACQUIRE_LOCK(&lk); + /* Lookup the bmsafemap so we track when it is dirty. */ + fs = VFSTOUFS(mp)->um_fs; + bmsafemap = bmsafemap_lookup(mp, bp, dtog(fs, blkno)); /* * Detach any jnewblks which have been canceled. They must linger * until the bitmap is cleared again by ffs_blkfree() to prevent * an unjournaled allocation from hitting the disk. */ if (wkhd) { - LIST_FOREACH_SAFE(wk, wkhd, wk_list, wkn) { - if (wk->wk_type != D_JNEWBLK) + while ((wk = LIST_FIRST(wkhd)) != NULL) { + WORKLIST_REMOVE(wk); + if (wk->wk_type != D_JNEWBLK) { + WORKLIST_INSERT(&bmsafemap->sm_freehd, wk); continue; + } jnewblk = WK_JNEWBLK(wk); KASSERT(jnewblk->jn_state & GOINGAWAY, ("softdep_setup_blkfree: jnewblk not canceled.")); - WORKLIST_REMOVE(wk); #ifdef SUJ_DEBUG /* * Assert that this block is free in the bitmap * before we discard the jnewblk. */ - fs = VFSTOUFS(mp)->um_fs; cgp = (struct cg *)bp->b_data; blksfree = cg_blksfree(cgp); bno = dtogd(fs, jnewblk->jn_blkno); @@ -9117,12 +9808,6 @@ softdep_setup_blkfree(mp, bp, blkno, frags, wkhd) wk->wk_state |= COMPLETE | ATTACHED; free_jnewblk(jnewblk); } - /* - * The buf must be locked by the caller otherwise these could - * be added while it's being written and the write would - * complete them before they made it to disk. - */ - jwork_move(&bp->b_dep, wkhd); } #ifdef SUJ_DEBUG @@ -9242,6 +9927,8 @@ initiate_write_bmsafemap(bmsafemap, bp) inodedep, id_deps); LIST_SWAP(&bmsafemap->sm_newblkhd, &bmsafemap->sm_newblkwr, newblk, nb_deps); + LIST_SWAP(&bmsafemap->sm_freehd, &bmsafemap->sm_freewr, worklist, + wk_list); } /* @@ -9260,6 +9947,7 @@ softdep_disk_write_complete(bp) struct worklist *wk; struct worklist *owk; struct workhead reattach; + struct freeblks *freeblks; struct buf *sbp; /* @@ -9277,6 +9965,7 @@ softdep_disk_write_complete(bp) ACQUIRE_LOCK(&lk); while ((wk = LIST_FIRST(&bp->b_dep)) != NULL) { WORKLIST_REMOVE(wk); + dep_write[wk->wk_type]++; if (wk == owk) panic("duplicate worklist: %p\n", wk); owk = wk; @@ -9318,7 +10007,9 @@ softdep_disk_write_complete(bp) case D_FREEBLKS: wk->wk_state |= COMPLETE; - if ((wk->wk_state & ALLCOMPLETE) == ALLCOMPLETE) + freeblks = WK_FREEBLKS(wk); + if ((wk->wk_state & ALLCOMPLETE) == ALLCOMPLETE && + LIST_EMPTY(&freeblks->fb_jfreeblkhd)) add_to_worklist(wk, 1); continue; @@ -9326,10 +10017,6 @@ softdep_disk_write_complete(bp) handle_written_freework(WK_FREEWORK(wk)); break; - case D_FREEDEP: - free_freedep(WK_FREEDEP(wk)); - continue; - case D_JSEGDEP: free_jsegdep(WK_JSEGDEP(wk)); continue; @@ -9459,7 +10146,11 @@ handle_allocindir_partdone(aip) return; indirdep = aip->ai_indirdep; LIST_REMOVE(aip, ai_next); - if (indirdep->ir_state & UNDONE) { + /* + * Don't set a pointer while the buffer is undergoing IO or while + * we have active truncations. + */ + if (indirdep->ir_state & UNDONE || !TAILQ_EMPTY(&indirdep->ir_trunc)) { LIST_INSERT_HEAD(&indirdep->ir_donehd, aip, ai_next); return; } @@ -9490,6 +10181,12 @@ handle_jwork(wkhd) case D_JSEGDEP: free_jsegdep(WK_JSEGDEP(wk)); continue; + case D_FREEDEP: + free_freedep(WK_FREEDEP(wk)); + continue; + case D_FREEWORK: + handle_written_freework(WK_FREEWORK(wk)); + continue; default: panic("handle_jwork: Unknown type %s\n", TYPENAME(wk->wk_type)); @@ -9852,21 +10549,26 @@ handle_written_indirdep(indirdep, bp, bpp) struct buf **bpp; { struct allocindir *aip; + struct buf *sbp; int chgs; if (indirdep->ir_state & GOINGAWAY) - panic("disk_write_complete: indirdep gone"); + panic("handle_written_indirdep: indirdep gone"); + if ((indirdep->ir_state & IOSTARTED) == 0) + panic("handle_written_indirdep: IO not started"); chgs = 0; /* * If there were rollbacks revert them here. */ if (indirdep->ir_saveddata) { bcopy(indirdep->ir_saveddata, bp->b_data, bp->b_bcount); - free(indirdep->ir_saveddata, M_INDIRDEP); - indirdep->ir_saveddata = 0; + if (TAILQ_EMPTY(&indirdep->ir_trunc)) { + free(indirdep->ir_saveddata, M_INDIRDEP); + indirdep->ir_saveddata = NULL; + } chgs = 1; } - indirdep->ir_state &= ~UNDONE; + indirdep->ir_state &= ~(UNDONE | IOSTARTED); indirdep->ir_state |= ATTACHED; /* * Move allocindirs with written pointers to the completehd if @@ -9878,6 +10580,7 @@ handle_written_indirdep(indirdep, bp, bpp) if ((indirdep->ir_state & DEPCOMPLETE) == 0) { LIST_INSERT_HEAD(&indirdep->ir_completehd, aip, ai_next); + newblk_freefrag(&aip->ai_block); continue; } free_newblk(&aip->ai_block); @@ -9886,50 +10589,42 @@ handle_written_indirdep(indirdep, bp, bpp) * Move allocindirs that have finished dependency processing from * the done list to the write list after updating the pointers. */ - while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != 0) { - handle_allocindir_partdone(aip); - if (aip == LIST_FIRST(&indirdep->ir_donehd)) - panic("disk_write_complete: not gone"); - chgs = 1; + if (TAILQ_EMPTY(&indirdep->ir_trunc)) { + while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != 0) { + handle_allocindir_partdone(aip); + if (aip == LIST_FIRST(&indirdep->ir_donehd)) + panic("disk_write_complete: not gone"); + chgs = 1; + } } /* - * If this indirdep has been detached from its newblk during - * I/O we need to keep this dep attached to the buffer so - * deallocate_dependencies can find it and properly resolve - * any outstanding dependencies. + * Preserve the indirdep if there were any changes or if it is not + * yet valid on disk. */ - if ((indirdep->ir_state & (ONDEPLIST | DEPCOMPLETE)) == 0) - chgs = 1; - if ((bp->b_flags & B_DELWRI) == 0) + if (chgs) { stat_indir_blk_ptrs++; + bdirty(bp); + return (1); + } /* * If there were no changes we can discard the savedbp and detach * ourselves from the buf. We are only carrying completed pointers * in this case. */ - if (chgs == 0) { - struct buf *sbp; - - sbp = indirdep->ir_savebp; - sbp->b_flags |= B_INVAL | B_NOCACHE; - indirdep->ir_savebp = NULL; - if (*bpp != NULL) - panic("handle_written_indirdep: bp already exists."); - *bpp = sbp; - } else - bdirty(bp); + sbp = indirdep->ir_savebp; + sbp->b_flags |= B_INVAL | B_NOCACHE; + indirdep->ir_savebp = NULL; + indirdep->ir_bp = NULL; + if (*bpp != NULL) + panic("handle_written_indirdep: bp already exists."); + *bpp = sbp; /* - * If there are no fresh dependencies and none waiting on writes - * we can free the indirdep. + * The indirdep may not be freed until its parent points at it. */ - if ((indirdep->ir_state & DEPCOMPLETE) && chgs == 0) { - if (indirdep->ir_state & ONDEPLIST) - LIST_REMOVE(indirdep, ir_next); + if (indirdep->ir_state & DEPCOMPLETE) free_indirdep(indirdep); - return (0); - } - return (chgs); + return (0); } /* @@ -9974,6 +10669,7 @@ handle_written_bmsafemap(bmsafemap, bp) struct inodedep *inodedep; struct jaddref *jaddref, *jatmp; struct jnewblk *jnewblk, *jntmp; + struct ufsmount *ump; uint8_t *inosused; uint8_t *blksfree; struct cg *cgp; @@ -9985,9 +10681,15 @@ handle_written_bmsafemap(bmsafemap, bp) if ((bmsafemap->sm_state & IOSTARTED) == 0) panic("initiate_write_bmsafemap: Not started\n"); + ump = VFSTOUFS(bmsafemap->sm_list.wk_mp); chgs = 0; bmsafemap->sm_state &= ~IOSTARTED; /* + * Release journal work that was waiting on the write. + */ + handle_jwork(&bmsafemap->sm_freewr); + + /* * Restore unwritten inode allocation pending jaddref writes. */ if (!LIST_EMPTY(&bmsafemap->sm_jaddrefhd)) { @@ -10063,16 +10765,17 @@ handle_written_bmsafemap(bmsafemap, bp) LIST_REMOVE(inodedep, id_deps); inodedep->id_bmsafemap = NULL; } - if (LIST_EMPTY(&bmsafemap->sm_jaddrefhd) && + LIST_REMOVE(bmsafemap, sm_next); + if (chgs == 0 && LIST_EMPTY(&bmsafemap->sm_jaddrefhd) && LIST_EMPTY(&bmsafemap->sm_jnewblkhd) && LIST_EMPTY(&bmsafemap->sm_newblkhd) && - LIST_EMPTY(&bmsafemap->sm_inodedephd)) { - if (chgs) - bdirty(bp); + LIST_EMPTY(&bmsafemap->sm_inodedephd) && + LIST_EMPTY(&bmsafemap->sm_freehd)) { LIST_REMOVE(bmsafemap, sm_hash); WORKITEM_FREE(bmsafemap, D_BMSAFEMAP); return (0); } + LIST_INSERT_HEAD(&ump->softdep_dirtycg, bmsafemap, sm_next); bdirty(bp); return (1); } @@ -10119,17 +10822,19 @@ free_pagedep(pagedep) { int i; - if (pagedep->pd_state & (NEWBLOCK | ONWORKLIST)) + if (pagedep->pd_state & NEWBLOCK) return; + if (!LIST_EMPTY(&pagedep->pd_dirremhd)) + return; for (i = 0; i < DAHASHSZ; i++) if (!LIST_EMPTY(&pagedep->pd_diraddhd[i])) return; + if (!LIST_EMPTY(&pagedep->pd_pendinghd)) + return; if (!LIST_EMPTY(&pagedep->pd_jmvrefhd)) return; - if (!LIST_EMPTY(&pagedep->pd_dirremhd)) - return; - if (!LIST_EMPTY(&pagedep->pd_pendinghd)) - return; + if (pagedep->pd_state & ONWORKLIST) + WORKLIST_REMOVE(&pagedep->pd_list); LIST_REMOVE(pagedep, pd_hash); WORKITEM_FREE(pagedep, D_PAGEDEP); } @@ -10217,11 +10922,7 @@ handle_written_filepage(pagedep, bp) * Otherwise it will remain to track any new entries on * the page in case they are fsync'ed. */ - if ((pagedep->pd_state & NEWBLOCK) == 0 && - LIST_EMPTY(&pagedep->pd_jmvrefhd)) { - LIST_REMOVE(pagedep, pd_hash); - WORKITEM_FREE(pagedep, D_PAGEDEP); - } + free_pagedep(pagedep); return (0); } @@ -10655,6 +11356,44 @@ restart: BO_UNLOCK(bo); } +static void +sync_cgs(mp, waitfor) + struct mount *mp; + int waitfor; +{ + struct bmsafemap *bmsafemap; + struct bmsafemap *sentinal; + struct ufsmount *ump; + struct buf *bp; + + sentinal = malloc(sizeof(*sentinal), M_BMSAFEMAP, M_ZERO | M_WAITOK); + sentinal->sm_cg = -1; + ump = VFSTOUFS(mp); + ACQUIRE_LOCK(&lk); + LIST_INSERT_HEAD(&ump->softdep_dirtycg, sentinal, sm_next); + for (bmsafemap = LIST_NEXT(sentinal, sm_next); bmsafemap != NULL; + bmsafemap = LIST_NEXT(sentinal, sm_next)) { + /* Skip sentinals and cgs with no work to release. */ + if (bmsafemap->sm_cg == -1 || + LIST_EMPTY(&bmsafemap->sm_freehd)) { + LIST_REMOVE(sentinal, sm_next); + LIST_INSERT_AFTER(bmsafemap, sentinal, sm_next); + continue; + } + bp = getdirtybuf(bmsafemap->sm_buf, &lk, waitfor); + if (bp == NULL) + continue; + LIST_REMOVE(sentinal, sm_next); + LIST_INSERT_AFTER(bmsafemap, sentinal, sm_next); + FREE_LOCK(&lk); + bwrite(bp); + ACQUIRE_LOCK(&lk); + } + LIST_REMOVE(sentinal, sm_next); + FREE_LOCK(&lk); + free(sentinal, M_BMSAFEMAP); +} + /* * This routine is called when we are trying to synchronously flush a * file. This routine must eliminate any filesystem metadata dependencies @@ -10664,6 +11403,8 @@ restart: int softdep_sync_metadata(struct vnode *vp) { + struct indirdep *indirdep; + struct freework *freework; struct pagedep *pagedep; struct allocindir *aip; struct newblk *newblk; @@ -10678,7 +11419,8 @@ softdep_sync_metadata(struct vnode *vp) * Ensure that any direct block dependencies have been cleared. */ ACQUIRE_LOCK(&lk); - if ((error = flush_inodedep_deps(vp->v_mount, VTOI(vp)->i_number))) { + if ((error = flush_inodedep_deps(vp, vp->v_mount, + VTOI(vp)->i_number))) { FREE_LOCK(&lk); return (error); } @@ -10754,9 +11496,45 @@ loop: case D_INDIRDEP: restart: + indirdep = WK_INDIRDEP(wk); + TAILQ_FOREACH(freework, &indirdep->ir_trunc, fw_next) { + /* + * If the freework is on a worklist it is + * simply waiting for softdep flush to + * execute it. + */ + if (freework->fw_state & ONWORKLIST) { + if (freework->fw_state & ONDEPLIST) + WORKLIST_REMOVE( + &freework->fw_list); + else + remove_from_worklist( + &freework->fw_list); + freework->fw_state &= ~ONDEPLIST; + } else + freework = NULL; + /* + * Release the buf so that freework_freeblock + * has a chance to succeed. + */ + FREE_LOCK(&lk); + BUF_NOREC(bp); + bqrelse(bp); + if (freework) { + ACQUIRE_LOCK(&lk); + handle_workitem_indirblk(freework); + FREE_LOCK(&lk); + goto top; + } + /* + * We may need to flush cgs for the freework + * to finish. + */ + sync_cgs(vp->v_mount, MNT_WAIT); + goto top; + } - LIST_FOREACH(aip, - &WK_INDIRDEP(wk)->ir_deplisthd, ai_next) { + LIST_FOREACH(aip, &indirdep->ir_deplisthd, ai_next) { newblk = (struct newblk *)aip; if (newblk->nb_jnewblk != NULL) { stat_jwait_newblk++; @@ -10799,6 +11577,9 @@ loop: } continue; + case D_FREEWORK: + continue; + default: panic("softdep_sync_metadata: Unknown type %s", TYPENAME(wk->wk_type)); @@ -10851,11 +11632,58 @@ loop: } /* + * Make sure that all truncations have been started so that if we + * discover any lingering dependencies on indirdeps we can successfully + * flush them. + */ +static int +flush_trunclist(vp, inodedep, waitfor, errorp) + struct vnode *vp; + struct inodedep *inodedep; + int waitfor; + int *errorp; +{ + struct freeblks *freeblks; + + TAILQ_FOREACH(freeblks, &inodedep->id_freeblklst, fb_next) { + if (!LIST_EMPTY(&freeblks->fb_jfreeblkhd)) { + stat_jwait_freeblks++; + jwait(&LIST_FIRST(&freeblks->fb_jfreeblkhd)->jf_list); + return (1); + } + /* Freeblks is waiting on a inode write. */ + if ((freeblks->fb_state & COMPLETE) == 0) { + FREE_LOCK(&lk); + ffs_update(vp, 1); + goto restart; + } + /* + * If the truncation is complete it may be waiting for the vnode + * lock to update i_blocks. It may also simply be waiting on + * a busy softdep thread to start truncation. + */ + if ((freeblks->fb_state & (ONWORKLIST | ALLCOMPLETE)) == + (ONWORKLIST | ALLCOMPLETE)) { + remove_from_worklist(&freeblks->fb_list); + FREE_LOCK(&lk); + handle_workitem_freeblocks(freeblks, 0); + goto restart; + } + } + return (0); + +restart: + ACQUIRE_LOCK(&lk); + return (1); +} + +/* * Flush the dependencies associated with an inodedep. * Called with splbio blocked. */ static int -flush_inodedep_deps(mp, ino) +flush_inodedep_deps(vp, mp, ino) + struct vnode *vp; struct mount *mp; ino_t ino; { @@ -10895,7 +11723,8 @@ restart: if (flush_deplist(&inodedep->id_inoupdt, waitfor, &error) || flush_deplist(&inodedep->id_newinoupdt, waitfor, &error) || flush_deplist(&inodedep->id_extupdt, waitfor, &error) || - flush_deplist(&inodedep->id_newextupdt, waitfor, &error)) + flush_deplist(&inodedep->id_newextupdt, waitfor, &error) || + flush_trunclist(vp, inodedep, waitfor, &error)) continue; /* * If pass2, we are done, otherwise do pass 2. @@ -11237,6 +12066,8 @@ softdep_slowdown(vp) softdep_speedup(); stat_sync_limit_hit += 1; FREE_LOCK(&lk); + if (DOINGSUJ(vp)) + return (0); return (1); } @@ -11338,6 +12169,7 @@ retry: UFS_UNLOCK(ump); ACQUIRE_LOCK(&lk); process_removes(vp); + process_truncates(vp); if (ump->softdep_on_worklist > 0 && process_worklist_item(UFSTOVFS(ump), LK_NOWAIT) != -1) { stat_worklist_push += 1; @@ -11363,24 +12195,14 @@ retry: UFS_UNLOCK(ump); MNT_ILOCK(mp); MNT_VNODE_FOREACH(lvp, mp, mvp) { - UFS_LOCK(ump); - if (ump->softdep_on_worklist > 0) { - UFS_UNLOCK(ump); - MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp); - MNT_IUNLOCK(mp); - UFS_LOCK(ump); - stat_cleanup_retries += 1; - goto retry; - } - UFS_UNLOCK(ump); VI_LOCK(lvp); - if (TAILQ_FIRST(&lvp->v_bufobj.bo_dirty.bv_hd) == 0 || - VOP_ISLOCKED(lvp) != 0) { + if (TAILQ_FIRST(&lvp->v_bufobj.bo_dirty.bv_hd) == 0) { VI_UNLOCK(lvp); continue; } MNT_IUNLOCK(mp); - if (vget(lvp, LK_EXCLUSIVE | LK_INTERLOCK, curthread)) { + if (vget(lvp, LK_EXCLUSIVE | LK_INTERLOCK | LK_NOWAIT, + curthread)) { MNT_ILOCK(mp); continue; } @@ -11394,8 +12216,17 @@ retry: MNT_ILOCK(mp); } MNT_IUNLOCK(mp); + lvp = ump->um_devvp; + if (vn_lock(lvp, LK_EXCLUSIVE | LK_NOWAIT) == 0) { + VOP_FSYNC(lvp, MNT_NOWAIT, curthread); + VOP_UNLOCK(lvp, 0); + } + UFS_LOCK(ump); + if (ump->softdep_on_worklist > 0) { + stat_cleanup_retries += 1; + goto retry; + } stat_cleanup_failures += 1; - UFS_LOCK(ump); } if (time_second - starttime > stat_cleanup_high_delay) stat_cleanup_high_delay = time_second - starttime; @@ -11664,6 +12495,7 @@ softdep_count_dependencies(bp, wantcount) { struct worklist *wk; struct bmsafemap *bmsafemap; + struct freework *freework; struct inodedep *inodedep; struct indirdep *indirdep; struct freeblks *freeblks; @@ -11711,6 +12543,13 @@ softdep_count_dependencies(bp, wantcount) case D_INDIRDEP: indirdep = WK_INDIRDEP(wk); + TAILQ_FOREACH(freework, &indirdep->ir_trunc, fw_next) { + /* indirect truncation dependency */ + retval += 1; + if (!wantcount) + goto out; + } + LIST_FOREACH(aip, &indirdep->ir_deplisthd, ai_next) { /* indirect block pointer dependency */ retval += 1; Index: /usr/src/sys/ufs/ffs/ffs_alloc.c =================================================================== --- /usr/src/sys/ufs/ffs/ffs_alloc.c (revision 221878) +++ /usr/src/sys/ufs/ffs/ffs_alloc.c (working copy) @@ -217,7 +217,7 @@ nospace: (void) chkdq(ip, -btodb(size), cred, FORCE); UFS_LOCK(ump); #endif - if (reclaimed == 0) { + if (reclaimed == 0 && (flags & IO_BUFLOCKED) == 0) { reclaimed = 1; softdep_request_cleanup(fs, ITOV(ip), cred, FLUSH_BLOCKS_WAIT); goto retry; @@ -418,15 +418,15 @@ nospace: /* * no space available */ - if (reclaimed == 0) { + if (reclaimed == 0 && (flags & IO_BUFLOCKED) == 0) { reclaimed = 1; - softdep_request_cleanup(fs, vp, cred, FLUSH_BLOCKS_WAIT); UFS_UNLOCK(ump); if (bp) { brelse(bp); bp = NULL; } UFS_LOCK(ump); + softdep_request_cleanup(fs, vp, cred, FLUSH_BLOCKS_WAIT); goto retry; } UFS_UNLOCK(ump); Index: /usr/src/sys/ufs/ffs/ffs_extern.h =================================================================== --- /usr/src/sys/ufs/ffs/ffs_extern.h (revision 221878) +++ /usr/src/sys/ufs/ffs/ffs_extern.h (working copy) @@ -107,7 +107,6 @@ extern struct vop_vector ffs_fifoops2; int softdep_check_suspend(struct mount *, struct vnode *, int, int, int, int); -int softdep_complete_trunc(struct vnode *, void *); void softdep_get_depcounts(struct mount *, int *, int *); void softdep_initialize(void); void softdep_uninitialize(void); @@ -139,7 +138,6 @@ void softdep_setup_blkfree(struct mount *, struct void softdep_setup_inofree(struct mount *, struct buf *, ino_t, struct workhead *); void softdep_setup_sbupdate(struct ufsmount *, struct fs *, struct buf *); -void *softdep_setup_trunc(struct vnode *vp, off_t length, int flags); void softdep_fsync_mountdev(struct vnode *); int softdep_sync_metadata(struct vnode *); int softdep_process_worklist(struct mount *, int); @@ -147,7 +145,9 @@ int softdep_fsync(struct vnode *); int softdep_waitidle(struct mount *); int softdep_prealloc(struct vnode *, int); int softdep_journal_lookup(struct mount *, struct vnode **); +void softdep_journal_freeblocks(struct inode *, struct ucred *, off_t, int); + /* * Things to request flushing in softdep_request_cleanup() */ Index: /usr/src/sys/ufs/ffs/softdep.h =================================================================== --- /usr/src/sys/ufs/ffs/softdep.h (revision 221878) +++ /usr/src/sys/ufs/ffs/softdep.h (working copy) @@ -127,7 +127,7 @@ #define DIRCHG 0x000080 /* diradd, dirrem only */ #define GOINGAWAY 0x000100 /* indirdep, jremref only */ #define IOSTARTED 0x000200 /* inodedep, pagedep, bmsafemap only */ -#define UNUSED400 0x000400 /* currently available. */ +#define DELAYEDFREE 0x000400 /* allocindirect free delayed. */ #define NEWBLOCK 0x000800 /* pagedep, jaddref only */ #define INPROGRESS 0x001000 /* dirrem, freeblks, freefrag, freefile only */ #define UFS1FMT 0x002000 /* indirdep only */ @@ -215,8 +215,10 @@ LIST_HEAD(jmvrefhd, jmvref); LIST_HEAD(jnewblkhd, jnewblk); LIST_HEAD(jfreeblkhd, jfreeblk); LIST_HEAD(freeworkhd, freework); +TAILQ_HEAD(freeworklst, freework); TAILQ_HEAD(jseglst, jseg); TAILQ_HEAD(inoreflst, inoref); +TAILQ_HEAD(freeblklst, freeblks); /* * The "pagedep" structure tracks the various dependencies related to @@ -321,6 +323,7 @@ struct inodedep { struct allocdirectlst id_newinoupdt; /* updates when inode written */ struct allocdirectlst id_extupdt; /* extdata updates pre-inode write */ struct allocdirectlst id_newextupdt; /* extdata updates at ino write */ + struct freeblklst id_freeblklst; /* List of partial truncates. */ union { struct ufs1_dinode *idu_savedino1; /* saved ufs1_dinode contents */ struct ufs2_dinode *idu_savedino2; /* saved ufs2_dinode contents */ @@ -342,8 +345,9 @@ struct inodedep { struct bmsafemap { struct worklist sm_list; /* cylgrp buffer */ # define sm_state sm_list.wk_state + LIST_ENTRY(bmsafemap) sm_hash; /* Hash links. */ + LIST_ENTRY(bmsafemap) sm_next; /* Mount list. */ int sm_cg; - LIST_ENTRY(bmsafemap) sm_hash; /* Hash links. */ struct buf *sm_buf; /* associated buffer */ struct allocdirecthd sm_allocdirecthd; /* allocdirect deps */ struct allocdirecthd sm_allocdirectwr; /* writing allocdirect deps */ @@ -355,6 +359,8 @@ struct bmsafemap { struct newblkhd sm_newblkwr; /* writing newblk deps */ struct jaddrefhd sm_jaddrefhd; /* Pending inode allocations. */ struct jnewblkhd sm_jnewblkhd; /* Pending block allocations. */ + struct workhead sm_freehd; /* Freedep deps. */ + struct workhead sm_freewr; /* Written freedeps. */ }; /* @@ -442,14 +448,15 @@ struct indirdep { struct worklist ir_list; /* buffer holding indirect block */ # define ir_state ir_list.wk_state /* indirect block pointer state */ LIST_ENTRY(indirdep) ir_next; /* alloc{direct,indir} list */ + TAILQ_HEAD(, freework) ir_trunc; /* List of truncations. */ caddr_t ir_saveddata; /* buffer cache contents */ struct buf *ir_savebp; /* buffer holding safe copy */ + struct buf *ir_bp; /* buffer holding live copy */ struct allocindirhd ir_completehd; /* waiting for indirdep complete */ struct allocindirhd ir_writehd; /* Waiting for the pointer write. */ struct allocindirhd ir_donehd; /* done waiting to update safecopy */ struct allocindirhd ir_deplisthd; /* allocindir deps for this block */ - struct jnewblkhd ir_jnewblkhd; /* Canceled block allocations. */ - struct workhead ir_jwork; /* Journal work pending. */ + struct freeblks *ir_freeblks; /* Freeblks that frees this indir. */ }; /* @@ -471,6 +478,7 @@ struct allocindir { LIST_ENTRY(allocindir) ai_next; /* indirdep's list of allocindir's */ struct indirdep *ai_indirdep; /* address of associated indirdep */ ufs2_daddr_t ai_oldblkno; /* old value of block pointer */ + ufs_lbn_t ai_lbn; /* Logical block number. */ int ai_offset; /* Pointer offset in parent. */ }; #define ai_newblkno ai_block.nb_newblkno @@ -516,13 +524,18 @@ struct freefrag { struct freeblks { struct worklist fb_list; /* id_inowait or delayed worklist */ # define fb_state fb_list.wk_state /* inode and dirty block state */ + TAILQ_ENTRY(freeblks) fb_next; /* List of inode truncates. */ struct jfreeblkhd fb_jfreeblkhd; /* Journal entries pending */ struct workhead fb_freeworkhd; /* Work items pending */ struct workhead fb_jwork; /* Journal work pending */ - ino_t fb_previousinum; /* inode of previous owner of blocks */ + struct jtrunc *fb_jtrunc; /* Journal truncation record. */ + struct vnode *fb_devvp; /* filesystem device vnode */ + uint64_t fb_modrev; /* Inode revision at start of trunc. */ + off_t fb_len; /* Length we're truncating to. */ + ufs2_daddr_t fb_chkcnt; /* Expected blks released. */ + ufs2_daddr_t fb_freecnt; /* Actual blocks released. */ + ino_t fb_inum; /* inode owner of blocks */ uid_t fb_uid; /* uid of previous owner of blocks */ - struct vnode *fb_devvp; /* filesystem device vnode */ - ufs2_daddr_t fb_chkcnt; /* used to check cnt of blks released */ int fb_ref; /* Children outstanding. */ }; @@ -538,16 +551,18 @@ struct freeblks { struct freework { struct worklist fw_list; /* Delayed worklist. */ # define fw_state fw_list.wk_state - LIST_ENTRY(freework) fw_next; /* For seg journal list. */ - struct jnewblk *fw_jnewblk; /* Journal entry to cancel. */ + LIST_ENTRY(freework) fw_segs; /* Seg list. */ + TAILQ_ENTRY(freework) fw_next; /* Hash/Trunc list. */ + struct jnewblk *fw_jnewblk; /* Journal entry to cancel. */ struct freeblks *fw_freeblks; /* Root of operation. */ struct freework *fw_parent; /* Parent indirect. */ + struct indirdep *fw_indir; /* indirect block. */ ufs2_daddr_t fw_blkno; /* Our block #. */ ufs_lbn_t fw_lbn; /* Original lbn before free. */ - int fw_frags; /* Number of frags. */ - int fw_ref; /* Number of children out. */ - int fw_off; /* Current working position. */ - struct workhead fw_jwork; /* Journal work pending. */ + uint16_t fw_frags; /* Number of frags. */ + uint16_t fw_ref; /* Number of children out. */ + uint16_t fw_off; /* Current working position. */ + uint16_t fw_start; /* Start of partial truncate. */ }; /* @@ -674,6 +689,7 @@ struct dirrem { LIST_ENTRY(dirrem) dm_inonext; /* inodedep's list of dirrem's */ struct jremrefhd dm_jremrefhd; /* Pending remove reference deps. */ ino_t dm_oldinum; /* inum of the removed dir entry */ + doff_t dm_offset; /* offset of removed dir entry in blk */ union { struct pagedep *dmu_pagedep; /* pagedep dependency for remove */ ino_t dmu_dirinum; /* parent inode number (for rmdir) */ @@ -707,7 +723,7 @@ struct dirrem { */ struct newdirblk { struct worklist db_list; /* id_inowait or pg_newdirblk */ -# define db_state db_list.wk_state /* unused */ +# define db_state db_list.wk_state struct pagedep *db_pagedep; /* associated pagedep */ struct workhead db_mkdir; }; @@ -807,7 +823,6 @@ struct jnewblk { # define jn_state jn_list.wk_state struct jsegdep *jn_jsegdep; /* Will track our journal record. */ LIST_ENTRY(jnewblk) jn_deps; /* Jnewblks on sm_jnewblkhd. */ - LIST_ENTRY(jnewblk) jn_indirdeps; /* Jnewblks on ir_jnewblkhd. */ struct worklist *jn_dep; /* Dependency to ref completed seg. */ ino_t jn_ino; /* Ino to which allocated. */ ufs_lbn_t jn_lbn; /* Lbn to which allocated. */ @@ -831,6 +846,7 @@ struct jfreeblk { ufs_lbn_t jf_lbn; /* Lbn from which blocks freed. */ ufs2_daddr_t jf_blkno; /* Blkno being freed. */ int jf_frags; /* Number of frags being freed. */ + int jf_off; /* Offset into indir. */ }; /* @@ -850,14 +866,12 @@ struct jfreefrag { }; /* - * A "jtrunc" journals the intent to truncate an inode to a non-zero - * value. This is done synchronously prior to the synchronous partial - * truncation process. The jsegdep is not released until the truncation - * is complete and the truncated inode is fsync'd. + * A "jtrunc" journals the intent to truncate an inode's data or extent area. */ struct jtrunc { struct worklist jt_list; /* Linked to softdep_journal_pending. */ struct jsegdep *jt_jsegdep; /* Will track our journal record. */ + struct freeblks *jt_freeblks; /* The parent freeblks operation. */ ino_t jt_ino; /* Ino being truncated. */ off_t jt_size; /* Final file size. */ int jt_extsize; /* Final extent size. */ Index: /usr/src/sys/ufs/ffs/ffs_balloc.c =================================================================== --- /usr/src/sys/ufs/ffs/ffs_balloc.c (revision 221878) +++ /usr/src/sys/ufs/ffs/ffs_balloc.c (working copy) @@ -105,6 +105,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffse ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1]; int unwindidx = -1; int saved_inbdflush; + int reclaimed; ip = VTOI(vp); dp = ip->i_din1; @@ -112,6 +113,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffse ump = ip->i_ump; lbn = lblkno(fs, startoffset); size = blkoff(fs, startoffset) + size; + reclaimed = 0; if (size > fs->fs_bsize) panic("ffs_balloc_ufs1: blk too big"); *bpp = NULL; @@ -276,6 +278,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffse /* * Fetch through the indirect blocks, allocating as necessary. */ +retry: for (i = 1;;) { error = bread(vp, indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); @@ -296,8 +299,15 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffse if (pref == 0) pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0); if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, - flags, cred, &newb)) != 0) { + flags | IO_BUFLOCKED, cred, &newb)) != 0) { brelse(bp); + if (++reclaimed == 1) { + UFS_LOCK(ump); + softdep_request_cleanup(fs, vp, cred, + FLUSH_BLOCKS_WAIT); + UFS_UNLOCK(ump); + goto retry; + } goto fail; } nb = newb; @@ -349,10 +359,17 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffse if (nb == 0) { UFS_LOCK(ump); pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, &bap[0]); - error = ffs_alloc(ip, - lbn, pref, (int)fs->fs_bsize, flags, cred, &newb); + error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, + flags | IO_BUFLOCKED, cred, &newb); if (error) { brelse(bp); + if (++reclaimed == 1) { + UFS_LOCK(ump); + softdep_request_cleanup(fs, vp, cred, + FLUSH_BLOCKS_WAIT); + UFS_UNLOCK(ump); + goto retry; + } goto fail; } nb = newb; @@ -506,6 +523,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffse int deallocated, osize, nsize, num, i, error; int unwindidx = -1; int saved_inbdflush; + int reclaimed; ip = VTOI(vp); dp = ip->i_din2; @@ -513,6 +531,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffse ump = ip->i_ump; lbn = lblkno(fs, startoffset); size = blkoff(fs, startoffset) + size; + reclaimed = 0; if (size > fs->fs_bsize) panic("ffs_balloc_ufs2: blk too big"); *bpp = NULL; @@ -787,6 +806,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffse /* * Fetch through the indirect blocks, allocating as necessary. */ +retry: for (i = 1;;) { error = bread(vp, indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); @@ -807,8 +827,15 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffse if (pref == 0) pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0); if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, - flags, cred, &newb)) != 0) { + flags | IO_BUFLOCKED, cred, &newb)) != 0) { brelse(bp); + if (++reclaimed == 1) { + UFS_LOCK(ump); + softdep_request_cleanup(fs, vp, cred, + FLUSH_BLOCKS_WAIT); + UFS_UNLOCK(ump); + goto retry; + } goto fail; } nb = newb; @@ -860,10 +887,17 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffse if (nb == 0) { UFS_LOCK(ump); pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off, &bap[0]); - error = ffs_alloc(ip, - lbn, pref, (int)fs->fs_bsize, flags, cred, &newb); + error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, + flags | IO_BUFLOCKED, cred, &newb); if (error) { brelse(bp); + if (++reclaimed == 1) { + UFS_LOCK(ump); + softdep_request_cleanup(fs, vp, cred, + FLUSH_BLOCKS_WAIT); + UFS_UNLOCK(ump); + goto retry; + } goto fail; } nb = newb; Index: /usr/src/sys/ufs/ffs/ffs_inode.c =================================================================== --- /usr/src/sys/ufs/ffs/ffs_inode.c (revision 221878) +++ /usr/src/sys/ufs/ffs/ffs_inode.c (working copy) @@ -151,12 +151,12 @@ ffs_truncate(vp, length, flags, cred, td) ufs2_daddr_t bn, lbn, lastblock, lastiblock[NIADDR], indir_lbn[NIADDR]; ufs2_daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR]; ufs2_daddr_t count, blocksreleased = 0, datablocks; - void *cookie; struct bufobj *bo; struct fs *fs; struct buf *bp; struct ufsmount *ump; - int needextclean, softdepslowdown, extblocks; + int softdeptrunc, journaltrunc; + int needextclean, extblocks; int offset, size, level, nblocks; int i, error, allerror; off_t osize; @@ -165,7 +165,6 @@ ffs_truncate(vp, length, flags, cred, td) fs = ip->i_fs; ump = ip->i_ump; bo = &vp->v_bufobj; - cookie = NULL; ASSERT_VOP_LOCKED(vp, "ffs_truncate"); @@ -191,7 +190,10 @@ ffs_truncate(vp, length, flags, cred, td) */ allerror = 0; needextclean = 0; - softdepslowdown = DOINGSOFTDEP(vp) && softdep_slowdown(vp); + softdeptrunc = 0; + journaltrunc = DOINGSUJ(vp); + if (journaltrunc == 0 && DOINGSOFTDEP(vp) && length == 0) + softdeptrunc = !softdep_slowdown(vp); extblocks = 0; datablocks = DIP(ip, i_blocks); if (fs->fs_magic == FS_UFS2_MAGIC && ip->i_din2->di_extsize > 0) { @@ -199,19 +201,24 @@ ffs_truncate(vp, length, flags, cred, td) datablocks -= extblocks; } if ((flags & IO_EXT) && extblocks > 0) { - if (DOINGSOFTDEP(vp) && softdepslowdown == 0 && length == 0) { + if (softdeptrunc) { if ((flags & IO_NORMAL) == 0) { softdep_setup_freeblocks(ip, length, IO_EXT); return (0); } needextclean = 1; + } else if (journaltrunc) { + if ((flags & IO_NORMAL) == 0) { + softdep_journal_freeblocks(ip, cred, length, + IO_EXT); + return (0); + } + needextclean = 1; } else { if (length != 0) panic("ffs_truncate: partial trunc of extdata"); if ((error = ffs_syncvnode(vp, MNT_WAIT)) != 0) return (error); - if (DOINGSUJ(vp)) - cookie = softdep_setup_trunc(vp, length, flags); osize = ip->i_din2->di_extsize; ip->i_din2->di_blocks -= extblocks; #ifdef QUOTA @@ -300,7 +307,7 @@ ffs_truncate(vp, length, flags, cred, td) goto out; } if (DOINGSOFTDEP(vp)) { - if (length > 0 || softdepslowdown) { + if (softdeptrunc == 0 && journaltrunc == 0) { /* * If a file is only partially truncated, then * we have to clean up the data structures @@ -312,27 +319,29 @@ ffs_truncate(vp, length, flags, cred, td) */ if ((error = ffs_syncvnode(vp, MNT_WAIT)) != 0) goto out; - /* - * We have to journal the truncation before we change - * any blocks so we don't leave the file partially - * truncated. - */ - if (DOINGSUJ(vp) && cookie == NULL) - cookie = softdep_setup_trunc(vp, length, flags); } else { #ifdef QUOTA - (void) chkdq(ip, -datablocks, NOCRED, 0); + /* XXX This is wrong for partial truncation. */ + if (length == 0) + (void) chkdq(ip, -datablocks, NOCRED, 0); #endif - softdep_setup_freeblocks(ip, length, needextclean ? - IO_EXT | IO_NORMAL : IO_NORMAL); + flags = IO_NORMAL | (needextclean ? IO_EXT: 0); + if (journaltrunc) + softdep_journal_freeblocks(ip, cred, length, + flags); + else + softdep_setup_freeblocks(ip, length, flags); ASSERT_VOP_LOCKED(vp, "ffs_truncate1"); - vinvalbuf(vp, needextclean ? 0 : V_NORMAL, 0, 0); + if (needextclean) + vinvalbuf(vp, V_ALT, 0, 0); + error = vtruncbuf(vp, cred, td, length, fs->fs_bsize); if (!needextclean) ffs_pages_remove(vp, 0, OFF_TO_IDX(lblktosize(fs, -extblocks))); - vnode_pager_setsize(vp, 0); - ip->i_flag |= IN_CHANGE | IN_UPDATE; - error = ffs_update(vp, 0); + if (journaltrunc == 0) { + ip->i_flag |= IN_CHANGE | IN_UPDATE; + error = ffs_update(vp, 0); + } goto out; } } @@ -411,13 +420,7 @@ ffs_truncate(vp, length, flags, cred, td) DIP_SET(ip, i_db[i], 0); } ip->i_flag |= IN_CHANGE | IN_UPDATE; - /* - * When doing softupdate journaling we must preserve the size along - * with the old pointers until they are freed or we might not - * know how many fragments remain. - */ - if (!DOINGSUJ(vp)) - allerror = ffs_update(vp, 1); + allerror = ffs_update(vp, 1); /* * Having written the new inode to disk, save its new configuration @@ -543,11 +546,6 @@ done: #endif error = allerror; out: - if (cookie) { - allerror = softdep_complete_trunc(vp, cookie); - if (allerror != 0 && error == 0) - error = allerror; - } return (error); } Index: /usr/src/sys/ufs/ffs/fs.h =================================================================== --- /usr/src/sys/ufs/ffs/fs.h (revision 221878) +++ /usr/src/sys/ufs/ffs/fs.h (working copy) @@ -715,7 +715,9 @@ struct jmvrec { /* * Block record. A set of frags or tree of blocks starting at an indirect are - * freed or a set of frags are allocated. + * freed or a set of frags are allocated. If indiroff is set, only a subset + * of the pointers in an indirect block are freed and the indirect itself + * is retained. */ struct jblkrec { uint32_t jb_op; @@ -724,7 +726,7 @@ struct jblkrec { ufs_lbn_t jb_lbn; uint16_t jb_frags; uint16_t jb_oldfrags; - uint32_t jb_unused; + uint32_t jb_indiroff; }; /* Index: /usr/src/sys/kern/vfs_bio.c =================================================================== --- /usr/src/sys/kern/vfs_bio.c (revision 221878) +++ /usr/src/sys/kern/vfs_bio.c (working copy) @@ -3999,10 +3999,11 @@ DB_SHOW_COMMAND(buffer, db_show_buffer) db_printf("b_flags = 0x%b\n", (u_int)bp->b_flags, PRINT_BUF_FLAGS); db_printf( "b_error = %d, b_bufsize = %ld, b_bcount = %ld, b_resid = %ld\n" - "b_bufobj = (%p), b_data = %p, b_blkno = %jd, b_dep = %p\n", + "b_bufobj = (%p), b_data = %p, b_blkno = %jd, b_lblkno = %jd, " + "b_dep = %p\n", bp->b_error, bp->b_bufsize, bp->b_bcount, bp->b_resid, bp->b_bufobj, bp->b_data, (intmax_t)bp->b_blkno, - bp->b_dep.lh_first); + (intmax_t)bp->b_lblkno, bp->b_dep.lh_first); if (bp->b_npages) { int i; db_printf("b_npages = %d, pages(OBJ, IDX, PA): ", bp->b_npages); Index: /usr/src/sys/kern/vfs_subr.c =================================================================== --- /usr/src/sys/kern/vfs_subr.c (revision 221878) +++ /usr/src/sys/kern/vfs_subr.c (working copy) @@ -1376,7 +1376,7 @@ restart: } } - if (length > 0) { + if (length > 0 && (vp->v_mount->mnt_kern_flag & MNTK_SUJ) == 0) { restartsync: TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (bp->b_lblkno > 0) Index: /usr/src/sys/sys/vnode.h =================================================================== --- /usr/src/sys/sys/vnode.h (revision 221878) +++ /usr/src/sys/sys/vnode.h (working copy) @@ -302,6 +302,7 @@ struct vattr { #define IO_EXT 0x0400 /* operate on external attributes */ #define IO_NORMAL 0x0800 /* operate on regular data */ #define IO_NOMACCHECK 0x1000 /* MAC checks unnecessary */ +#define IO_BUFLOCKED 0x2000 /* ffs flag; indir buf is locked */ #define IO_SEQMAX 0x7F /* seq heuristic max value */ #define IO_SEQSHIFT 16 /* seq heuristic in upper 16 bits */ -- Test scenario: suj17.sh on a non SUJ FS