FreeBSD ZFS
The Zettabyte File System
|
00001 /* 00002 * CDDL HEADER START 00003 * 00004 * The contents of this file are subject to the terms of the 00005 * Common Development and Distribution License (the "License"). 00006 * You may not use this file except in compliance with the License. 00007 * 00008 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 00009 * or http://www.opensolaris.org/os/licensing. 00010 * See the License for the specific language governing permissions 00011 * and limitations under the License. 00012 * 00013 * When distributing Covered Code, include this CDDL HEADER in each 00014 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 00015 * If applicable, add the following below this CDDL HEADER, with the 00016 * fields enclosed by brackets "[]" replaced with your own identifying 00017 * information: Portions Copyright [yyyy] [name of copyright owner] 00018 * 00019 * CDDL HEADER END 00020 */ 00021 /* 00022 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 00023 * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>. 00024 * All rights reserved. 00025 */ 00026 00027 /* Portions Copyright 2010 Robert Milkowski */ 00028 00029 #include <sys/types.h> 00030 #include <sys/param.h> 00031 #include <sys/systm.h> 00032 #include <sys/kernel.h> 00033 #include <sys/sysmacros.h> 00034 #include <sys/kmem.h> 00035 #include <sys/acl.h> 00036 #include <sys/vnode.h> 00037 #include <sys/vfs.h> 00038 #include <sys/mntent.h> 00039 #include <sys/mount.h> 00040 #include <sys/cmn_err.h> 00041 #include <sys/zfs_znode.h> 00042 #include <sys/zfs_dir.h> 00043 #include <sys/zil.h> 00044 #include <sys/fs/zfs.h> 00045 #include <sys/dmu.h> 00046 #include <sys/dsl_prop.h> 00047 #include <sys/dsl_dataset.h> 00048 #include <sys/dsl_deleg.h> 00049 #include <sys/spa.h> 00050 #include <sys/zap.h> 00051 #include <sys/sa.h> 00052 #include <sys/sa_impl.h> 00053 #include <sys/varargs.h> 00054 #include <sys/policy.h> 00055 #include <sys/atomic.h> 00056 #include <sys/zfs_ioctl.h> 00057 #include <sys/zfs_ctldir.h> 00058 #include <sys/zfs_fuid.h> 00059 #include <sys/sunddi.h> 00060 #include <sys/dnlc.h> 00061 #include <sys/dmu_objset.h> 00062 #include <sys/spa_boot.h> 00063 #include <sys/jail.h> 00064 #include "zfs_comutil.h" 00065 00066 struct mtx zfs_debug_mtx; 00067 MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF); 00068 00069 SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system"); 00070 00071 int zfs_super_owner; 00072 SYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0, 00073 "File system owner can perform privileged operation on his file systems"); 00074 00078 int zfs_debug_level; 00079 TUNABLE_INT("vfs.zfs.debug", &zfs_debug_level); 00080 SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RW, &zfs_debug_level, 0, 00081 "Debug level"); 00082 00083 SYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions"); 00084 static int zfs_version_acl = ZFS_ACL_VERSION; 00085 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0, 00086 "ZFS_ACL_VERSION"); 00087 static int zfs_version_spa = SPA_VERSION; 00088 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0, 00089 "SPA_VERSION"); 00090 static int zfs_version_zpl = ZPL_VERSION; 00091 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0, 00092 "ZPL_VERSION"); 00093 00094 static int zfs_mount(vfs_t *vfsp); 00095 static int zfs_umount(vfs_t *vfsp, int fflag); 00096 static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp); 00097 static int zfs_statfs(vfs_t *vfsp, struct statfs *statp); 00098 static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp); 00099 static int zfs_sync(vfs_t *vfsp, int waitfor); 00100 static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 00101 struct ucred **credanonp, int *numsecflavors, int **secflavors); 00102 static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp); 00103 static void zfs_objset_close(zfsvfs_t *zfsvfs); 00104 static void zfs_freevfs(vfs_t *vfsp); 00105 00106 static struct vfsops zfs_vfsops = { 00107 .vfs_mount = zfs_mount, 00108 .vfs_unmount = zfs_umount, 00109 .vfs_root = zfs_root, 00110 .vfs_statfs = zfs_statfs, 00111 .vfs_vget = zfs_vget, 00112 .vfs_sync = zfs_sync, 00113 .vfs_checkexp = zfs_checkexp, 00114 .vfs_fhtovp = zfs_fhtovp, 00115 }; 00116 00117 VFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN); 00118 00124 static uint32_t zfs_active_fs_count = 0; 00125 00126 /*ARGSUSED*/ 00127 static int 00128 zfs_sync(vfs_t *vfsp, int waitfor) 00129 { 00130 00131 /* 00132 * Data integrity is job one. We don't want a compromised kernel 00133 * writing to the storage pool, so we never sync during panic. 00134 */ 00135 if (panicstr) 00136 return (0); 00137 00138 if (vfsp != NULL) { 00139 /* 00140 * Sync a specific filesystem. 00141 */ 00142 zfsvfs_t *zfsvfs = vfsp->vfs_data; 00143 dsl_pool_t *dp; 00144 int error; 00145 00146 error = vfs_stdsync(vfsp, waitfor); 00147 if (error != 0) 00148 return (error); 00149 00150 ZFS_ENTER(zfsvfs); 00151 dp = dmu_objset_pool(zfsvfs->z_os); 00152 00153 /* 00154 * If the system is shutting down, then skip any 00155 * filesystems which may exist on a suspended pool. 00156 */ 00157 if (sys_shutdown && spa_suspended(dp->dp_spa)) { 00158 ZFS_EXIT(zfsvfs); 00159 return (0); 00160 } 00161 00162 if (zfsvfs->z_log != NULL) 00163 zil_commit(zfsvfs->z_log, 0); 00164 00165 ZFS_EXIT(zfsvfs); 00166 } else { 00167 /* 00168 * Sync all ZFS filesystems. This is what happens when you 00169 * run sync(1M). Unlike other filesystems, ZFS honors the 00170 * request by waiting for all pools to commit all dirty data. 00171 */ 00172 spa_sync_allpools(); 00173 } 00174 00175 return (0); 00176 } 00177 00178 #ifndef __FreeBSD__ 00179 static int 00180 zfs_create_unique_device(dev_t *dev) 00181 { 00182 major_t new_major; 00183 00184 do { 00185 ASSERT3U(zfs_minor, <=, MAXMIN32); 00186 minor_t start = zfs_minor; 00187 do { 00188 mutex_enter(&zfs_dev_mtx); 00189 if (zfs_minor >= MAXMIN32) { 00190 /* 00191 * If we're still using the real major 00192 * keep out of /dev/zfs and /dev/zvol minor 00193 * number space. If we're using a getudev()'ed 00194 * major number, we can use all of its minors. 00195 */ 00196 if (zfs_major == ddi_name_to_major(ZFS_DRIVER)) 00197 zfs_minor = ZFS_MIN_MINOR; 00198 else 00199 zfs_minor = 0; 00200 } else { 00201 zfs_minor++; 00202 } 00203 *dev = makedevice(zfs_major, zfs_minor); 00204 mutex_exit(&zfs_dev_mtx); 00205 } while (vfs_devismounted(*dev) && zfs_minor != start); 00206 if (zfs_minor == start) { 00207 /* 00208 * We are using all ~262,000 minor numbers for the 00209 * current major number. Create a new major number. 00210 */ 00211 if ((new_major = getudev()) == (major_t)-1) { 00212 cmn_err(CE_WARN, 00213 "zfs_mount: Can't get unique major " 00214 "device number."); 00215 return (-1); 00216 } 00217 mutex_enter(&zfs_dev_mtx); 00218 zfs_major = new_major; 00219 zfs_minor = 0; 00220 00221 mutex_exit(&zfs_dev_mtx); 00222 } else { 00223 break; 00224 } 00225 /* CONSTANTCONDITION */ 00226 } while (1); 00227 00228 return (0); 00229 } 00230 #endif /* !__FreeBSD__ */ 00231 00232 static void 00233 atime_changed_cb(void *arg, uint64_t newval) 00234 { 00235 zfsvfs_t *zfsvfs = arg; 00236 00237 if (newval == TRUE) { 00238 zfsvfs->z_atime = TRUE; 00239 zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME; 00240 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 00241 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 00242 } else { 00243 zfsvfs->z_atime = FALSE; 00244 zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME; 00245 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 00246 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 00247 } 00248 } 00249 00250 static void 00251 xattr_changed_cb(void *arg, uint64_t newval) 00252 { 00253 zfsvfs_t *zfsvfs = arg; 00254 00255 if (newval == TRUE) { 00256 /* XXX locking on vfs_flag? */ 00257 #ifdef TODO 00258 zfsvfs->z_vfs->vfs_flag |= VFS_XATTR; 00259 #endif 00260 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR); 00261 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0); 00262 } else { 00263 /* XXX locking on vfs_flag? */ 00264 #ifdef TODO 00265 zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR; 00266 #endif 00267 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR); 00268 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0); 00269 } 00270 } 00271 00272 static void 00273 blksz_changed_cb(void *arg, uint64_t newval) 00274 { 00275 zfsvfs_t *zfsvfs = arg; 00276 00277 if (newval < SPA_MINBLOCKSIZE || 00278 newval > SPA_MAXBLOCKSIZE || !ISP2(newval)) 00279 newval = SPA_MAXBLOCKSIZE; 00280 00281 zfsvfs->z_max_blksz = newval; 00282 zfsvfs->z_vfs->mnt_stat.f_iosize = newval; 00283 } 00284 00285 static void 00286 readonly_changed_cb(void *arg, uint64_t newval) 00287 { 00288 zfsvfs_t *zfsvfs = arg; 00289 00290 if (newval) { 00291 /* XXX locking on vfs_flag? */ 00292 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 00293 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 00294 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 00295 } else { 00296 /* XXX locking on vfs_flag? */ 00297 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 00298 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 00299 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 00300 } 00301 } 00302 00303 static void 00304 setuid_changed_cb(void *arg, uint64_t newval) 00305 { 00306 zfsvfs_t *zfsvfs = arg; 00307 00308 if (newval == FALSE) { 00309 zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 00310 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 00311 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 00312 } else { 00313 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 00314 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 00315 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 00316 } 00317 } 00318 00319 static void 00320 exec_changed_cb(void *arg, uint64_t newval) 00321 { 00322 zfsvfs_t *zfsvfs = arg; 00323 00324 if (newval == FALSE) { 00325 zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 00326 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 00327 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 00328 } else { 00329 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 00330 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 00331 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 00332 } 00333 } 00334 00343 static void 00344 nbmand_changed_cb(void *arg, uint64_t newval) 00345 { 00346 zfsvfs_t *zfsvfs = arg; 00347 if (newval == FALSE) { 00348 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND); 00349 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0); 00350 } else { 00351 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND); 00352 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0); 00353 } 00354 } 00355 00356 static void 00357 snapdir_changed_cb(void *arg, uint64_t newval) 00358 { 00359 zfsvfs_t *zfsvfs = arg; 00360 00361 zfsvfs->z_show_ctldir = newval; 00362 } 00363 00364 static void 00365 vscan_changed_cb(void *arg, uint64_t newval) 00366 { 00367 zfsvfs_t *zfsvfs = arg; 00368 00369 zfsvfs->z_vscan = newval; 00370 } 00371 00372 static void 00373 acl_mode_changed_cb(void *arg, uint64_t newval) 00374 { 00375 zfsvfs_t *zfsvfs = arg; 00376 00377 zfsvfs->z_acl_mode = newval; 00378 } 00379 00380 static void 00381 acl_inherit_changed_cb(void *arg, uint64_t newval) 00382 { 00383 zfsvfs_t *zfsvfs = arg; 00384 00385 zfsvfs->z_acl_inherit = newval; 00386 } 00387 00388 static int 00389 zfs_register_callbacks(vfs_t *vfsp) 00390 { 00391 struct dsl_dataset *ds = NULL; 00392 objset_t *os = NULL; 00393 zfsvfs_t *zfsvfs = NULL; 00394 uint64_t nbmand; 00395 int readonly, do_readonly = B_FALSE; 00396 int setuid, do_setuid = B_FALSE; 00397 int exec, do_exec = B_FALSE; 00398 int xattr, do_xattr = B_FALSE; 00399 int atime, do_atime = B_FALSE; 00400 int error = 0; 00401 00402 ASSERT(vfsp); 00403 zfsvfs = vfsp->vfs_data; 00404 ASSERT(zfsvfs); 00405 os = zfsvfs->z_os; 00406 00407 /* 00408 * This function can be called for a snapshot when we update snapshot's 00409 * mount point, which isn't really supported. 00410 */ 00411 if (dmu_objset_is_snapshot(os)) 00412 return (EOPNOTSUPP); 00413 00414 /* 00415 * The act of registering our callbacks will destroy any mount 00416 * options we may have. In order to enable temporary overrides 00417 * of mount options, we stash away the current values and 00418 * restore them after we register the callbacks. 00419 */ 00420 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL) || 00421 !spa_writeable(dmu_objset_spa(os))) { 00422 readonly = B_TRUE; 00423 do_readonly = B_TRUE; 00424 } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 00425 readonly = B_FALSE; 00426 do_readonly = B_TRUE; 00427 } 00428 if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 00429 setuid = B_FALSE; 00430 do_setuid = B_TRUE; 00431 } else { 00432 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 00433 setuid = B_FALSE; 00434 do_setuid = B_TRUE; 00435 } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 00436 setuid = B_TRUE; 00437 do_setuid = B_TRUE; 00438 } 00439 } 00440 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 00441 exec = B_FALSE; 00442 do_exec = B_TRUE; 00443 } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 00444 exec = B_TRUE; 00445 do_exec = B_TRUE; 00446 } 00447 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 00448 xattr = B_FALSE; 00449 do_xattr = B_TRUE; 00450 } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 00451 xattr = B_TRUE; 00452 do_xattr = B_TRUE; 00453 } 00454 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { 00455 atime = B_FALSE; 00456 do_atime = B_TRUE; 00457 } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { 00458 atime = B_TRUE; 00459 do_atime = B_TRUE; 00460 } 00461 00462 /* 00463 * nbmand is a special property. It can only be changed at 00464 * mount time. 00465 * 00466 * This is weird, but it is documented to only be changeable 00467 * at mount time. 00468 */ 00469 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { 00470 nbmand = B_FALSE; 00471 } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) { 00472 nbmand = B_TRUE; 00473 } else { 00474 char osname[MAXNAMELEN]; 00475 00476 dmu_objset_name(os, osname); 00477 if (error = dsl_prop_get_integer(osname, "nbmand", &nbmand, 00478 NULL)) { 00479 return (error); 00480 } 00481 } 00482 00483 /* 00484 * Register property callbacks. 00485 * 00486 * It would probably be fine to just check for i/o error from 00487 * the first prop_register(), but I guess I like to go 00488 * overboard... 00489 */ 00490 ds = dmu_objset_ds(os); 00491 error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); 00492 error = error ? error : dsl_prop_register(ds, 00493 "xattr", xattr_changed_cb, zfsvfs); 00494 error = error ? error : dsl_prop_register(ds, 00495 "recordsize", blksz_changed_cb, zfsvfs); 00496 error = error ? error : dsl_prop_register(ds, 00497 "readonly", readonly_changed_cb, zfsvfs); 00498 error = error ? error : dsl_prop_register(ds, 00499 "setuid", setuid_changed_cb, zfsvfs); 00500 error = error ? error : dsl_prop_register(ds, 00501 "exec", exec_changed_cb, zfsvfs); 00502 error = error ? error : dsl_prop_register(ds, 00503 "snapdir", snapdir_changed_cb, zfsvfs); 00504 error = error ? error : dsl_prop_register(ds, 00505 "aclmode", acl_mode_changed_cb, zfsvfs); 00506 error = error ? error : dsl_prop_register(ds, 00507 "aclinherit", acl_inherit_changed_cb, zfsvfs); 00508 error = error ? error : dsl_prop_register(ds, 00509 "vscan", vscan_changed_cb, zfsvfs); 00510 if (error) 00511 goto unregister; 00512 00513 /* 00514 * Invoke our callbacks to restore temporary mount options. 00515 */ 00516 if (do_readonly) 00517 readonly_changed_cb(zfsvfs, readonly); 00518 if (do_setuid) 00519 setuid_changed_cb(zfsvfs, setuid); 00520 if (do_exec) 00521 exec_changed_cb(zfsvfs, exec); 00522 if (do_xattr) 00523 xattr_changed_cb(zfsvfs, xattr); 00524 if (do_atime) 00525 atime_changed_cb(zfsvfs, atime); 00526 00527 nbmand_changed_cb(zfsvfs, nbmand); 00528 00529 return (0); 00530 00531 unregister: 00532 /* 00533 * We may attempt to unregister some callbacks that are not 00534 * registered, but this is OK; it will simply return ENOMSG, 00535 * which we will ignore. 00536 */ 00537 (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); 00538 (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs); 00539 (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); 00540 (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); 00541 (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); 00542 (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); 00543 (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); 00544 (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs); 00545 (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, 00546 zfsvfs); 00547 (void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs); 00548 return (error); 00549 00550 } 00551 00552 static int 00553 zfs_space_delta_cb(dmu_object_type_t bonustype, void *data, 00554 uint64_t *userp, uint64_t *groupp) 00555 { 00556 int error = 0; 00557 00558 /* 00559 * Is it a valid type of object to track? 00560 */ 00561 if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA) 00562 return (ENOENT); 00563 00564 /* 00565 * If we have a NULL data pointer 00566 * then assume the id's aren't changing and 00567 * return EEXIST to the dmu to let it know to 00568 * use the same ids 00569 */ 00570 if (data == NULL) 00571 return (EEXIST); 00572 00573 if (bonustype == DMU_OT_ZNODE) { 00574 znode_phys_t *znp = data; 00575 *userp = znp->zp_uid; 00576 *groupp = znp->zp_gid; 00577 } else { 00578 int hdrsize; 00579 sa_hdr_phys_t *sap = data; 00580 sa_hdr_phys_t sa = *sap; 00581 boolean_t swap = B_FALSE; 00582 00583 ASSERT(bonustype == DMU_OT_SA); 00584 00585 if (sa.sa_magic == 0) { 00586 /* 00587 * This should only happen for newly created 00588 * files that haven't had the znode data filled 00589 * in yet. 00590 */ 00591 *userp = 0; 00592 *groupp = 0; 00593 return (0); 00594 } 00595 if (sa.sa_magic == BSWAP_32(SA_MAGIC)) { 00596 sa.sa_magic = SA_MAGIC; 00597 sa.sa_layout_info = BSWAP_16(sa.sa_layout_info); 00598 swap = B_TRUE; 00599 } else { 00600 VERIFY3U(sa.sa_magic, ==, SA_MAGIC); 00601 } 00602 00603 hdrsize = sa_hdrsize(&sa); 00604 VERIFY3U(hdrsize, >=, sizeof (sa_hdr_phys_t)); 00605 *userp = *((uint64_t *)((uintptr_t)data + hdrsize + 00606 SA_UID_OFFSET)); 00607 *groupp = *((uint64_t *)((uintptr_t)data + hdrsize + 00608 SA_GID_OFFSET)); 00609 if (swap) { 00610 *userp = BSWAP_64(*userp); 00611 *groupp = BSWAP_64(*groupp); 00612 } 00613 } 00614 return (error); 00615 } 00616 00617 static void 00618 fuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr, 00619 char *domainbuf, int buflen, uid_t *ridp) 00620 { 00621 uint64_t fuid; 00622 const char *domain; 00623 00624 fuid = strtonum(fuidstr, NULL); 00625 00626 domain = zfs_fuid_find_by_idx(zfsvfs, FUID_INDEX(fuid)); 00627 if (domain) 00628 (void) strlcpy(domainbuf, domain, buflen); 00629 else 00630 domainbuf[0] = '\0'; 00631 *ridp = FUID_RID(fuid); 00632 } 00633 00634 static uint64_t 00635 zfs_userquota_prop_to_obj(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type) 00636 { 00637 switch (type) { 00638 case ZFS_PROP_USERUSED: 00639 return (DMU_USERUSED_OBJECT); 00640 case ZFS_PROP_GROUPUSED: 00641 return (DMU_GROUPUSED_OBJECT); 00642 case ZFS_PROP_USERQUOTA: 00643 return (zfsvfs->z_userquota_obj); 00644 case ZFS_PROP_GROUPQUOTA: 00645 return (zfsvfs->z_groupquota_obj); 00646 } 00647 return (0); 00648 } 00649 00650 int 00651 zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, 00652 uint64_t *cookiep, void *vbuf, uint64_t *bufsizep) 00653 { 00654 int error; 00655 zap_cursor_t zc; 00656 zap_attribute_t za; 00657 zfs_useracct_t *buf = vbuf; 00658 uint64_t obj; 00659 00660 if (!dmu_objset_userspace_present(zfsvfs->z_os)) 00661 return (ENOTSUP); 00662 00663 obj = zfs_userquota_prop_to_obj(zfsvfs, type); 00664 if (obj == 0) { 00665 *bufsizep = 0; 00666 return (0); 00667 } 00668 00669 for (zap_cursor_init_serialized(&zc, zfsvfs->z_os, obj, *cookiep); 00670 (error = zap_cursor_retrieve(&zc, &za)) == 0; 00671 zap_cursor_advance(&zc)) { 00672 if ((uintptr_t)buf - (uintptr_t)vbuf + sizeof (zfs_useracct_t) > 00673 *bufsizep) 00674 break; 00675 00676 fuidstr_to_sid(zfsvfs, za.za_name, 00677 buf->zu_domain, sizeof (buf->zu_domain), &buf->zu_rid); 00678 00679 buf->zu_space = za.za_first_integer; 00680 buf++; 00681 } 00682 if (error == ENOENT) 00683 error = 0; 00684 00685 ASSERT3U((uintptr_t)buf - (uintptr_t)vbuf, <=, *bufsizep); 00686 *bufsizep = (uintptr_t)buf - (uintptr_t)vbuf; 00687 *cookiep = zap_cursor_serialize(&zc); 00688 zap_cursor_fini(&zc); 00689 return (error); 00690 } 00691 00695 static int 00696 id_to_fuidstr(zfsvfs_t *zfsvfs, const char *domain, uid_t rid, 00697 char *buf, boolean_t addok) 00698 { 00699 uint64_t fuid; 00700 int domainid = 0; 00701 00702 if (domain && domain[0]) { 00703 domainid = zfs_fuid_find_by_domain(zfsvfs, domain, NULL, addok); 00704 if (domainid == -1) 00705 return (ENOENT); 00706 } 00707 fuid = FUID_ENCODE(domainid, rid); 00708 (void) sprintf(buf, "%llx", (longlong_t)fuid); 00709 return (0); 00710 } 00711 00712 int 00713 zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, 00714 const char *domain, uint64_t rid, uint64_t *valp) 00715 { 00716 char buf[32]; 00717 int err; 00718 uint64_t obj; 00719 00720 *valp = 0; 00721 00722 if (!dmu_objset_userspace_present(zfsvfs->z_os)) 00723 return (ENOTSUP); 00724 00725 obj = zfs_userquota_prop_to_obj(zfsvfs, type); 00726 if (obj == 0) 00727 return (0); 00728 00729 err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_FALSE); 00730 if (err) 00731 return (err); 00732 00733 err = zap_lookup(zfsvfs->z_os, obj, buf, 8, 1, valp); 00734 if (err == ENOENT) 00735 err = 0; 00736 return (err); 00737 } 00738 00739 int 00740 zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, 00741 const char *domain, uint64_t rid, uint64_t quota) 00742 { 00743 char buf[32]; 00744 int err; 00745 dmu_tx_t *tx; 00746 uint64_t *objp; 00747 boolean_t fuid_dirtied; 00748 00749 if (type != ZFS_PROP_USERQUOTA && type != ZFS_PROP_GROUPQUOTA) 00750 return (EINVAL); 00751 00752 if (zfsvfs->z_version < ZPL_VERSION_USERSPACE) 00753 return (ENOTSUP); 00754 00755 objp = (type == ZFS_PROP_USERQUOTA) ? &zfsvfs->z_userquota_obj : 00756 &zfsvfs->z_groupquota_obj; 00757 00758 err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_TRUE); 00759 if (err) 00760 return (err); 00761 fuid_dirtied = zfsvfs->z_fuid_dirty; 00762 00763 tx = dmu_tx_create(zfsvfs->z_os); 00764 dmu_tx_hold_zap(tx, *objp ? *objp : DMU_NEW_OBJECT, B_TRUE, NULL); 00765 if (*objp == 0) { 00766 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE, 00767 zfs_userquota_prop_prefixes[type]); 00768 } 00769 if (fuid_dirtied) 00770 zfs_fuid_txhold(zfsvfs, tx); 00771 err = dmu_tx_assign(tx, TXG_WAIT); 00772 if (err) { 00773 dmu_tx_abort(tx); 00774 return (err); 00775 } 00776 00777 mutex_enter(&zfsvfs->z_lock); 00778 if (*objp == 0) { 00779 *objp = zap_create(zfsvfs->z_os, DMU_OT_USERGROUP_QUOTA, 00780 DMU_OT_NONE, 0, tx); 00781 VERIFY(0 == zap_add(zfsvfs->z_os, MASTER_NODE_OBJ, 00782 zfs_userquota_prop_prefixes[type], 8, 1, objp, tx)); 00783 } 00784 mutex_exit(&zfsvfs->z_lock); 00785 00786 if (quota == 0) { 00787 err = zap_remove(zfsvfs->z_os, *objp, buf, tx); 00788 if (err == ENOENT) 00789 err = 0; 00790 } else { 00791 err = zap_update(zfsvfs->z_os, *objp, buf, 8, 1, "a, tx); 00792 } 00793 ASSERT(err == 0); 00794 if (fuid_dirtied) 00795 zfs_fuid_sync(zfsvfs, tx); 00796 dmu_tx_commit(tx); 00797 return (err); 00798 } 00799 00800 boolean_t 00801 zfs_fuid_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid) 00802 { 00803 char buf[32]; 00804 uint64_t used, quota, usedobj, quotaobj; 00805 int err; 00806 00807 usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT; 00808 quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj; 00809 00810 if (quotaobj == 0 || zfsvfs->z_replay) 00811 return (B_FALSE); 00812 00813 (void) sprintf(buf, "%llx", (longlong_t)fuid); 00814 err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, "a); 00815 if (err != 0) 00816 return (B_FALSE); 00817 00818 err = zap_lookup(zfsvfs->z_os, usedobj, buf, 8, 1, &used); 00819 if (err != 0) 00820 return (B_FALSE); 00821 return (used >= quota); 00822 } 00823 00824 boolean_t 00825 zfs_owner_overquota(zfsvfs_t *zfsvfs, znode_t *zp, boolean_t isgroup) 00826 { 00827 uint64_t fuid; 00828 uint64_t quotaobj; 00829 00830 quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj; 00831 00832 fuid = isgroup ? zp->z_gid : zp->z_uid; 00833 00834 if (quotaobj == 0 || zfsvfs->z_replay) 00835 return (B_FALSE); 00836 00837 return (zfs_fuid_overquota(zfsvfs, isgroup, fuid)); 00838 } 00839 00840 int 00841 zfsvfs_create(const char *osname, zfsvfs_t **zfvp) 00842 { 00843 objset_t *os; 00844 zfsvfs_t *zfsvfs; 00845 uint64_t zval; 00846 int i, error; 00847 uint64_t sa_obj; 00848 00849 zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 00850 00851 /* 00852 * We claim to always be readonly so we can open snapshots; 00853 * other ZPL code will prevent us from writing to snapshots. 00854 */ 00855 error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os); 00856 if (error) { 00857 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 00858 return (error); 00859 } 00860 00861 /* 00862 * Initialize the zfs-specific filesystem structure. 00863 * Should probably make this a kmem cache, shuffle fields, 00864 * and just bzero up to z_hold_mtx[]. 00865 */ 00866 zfsvfs->z_vfs = NULL; 00867 zfsvfs->z_parent = zfsvfs; 00868 zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; 00869 zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 00870 zfsvfs->z_os = os; 00871 00872 error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version); 00873 if (error) { 00874 goto out; 00875 } else if (zfsvfs->z_version > 00876 zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) { 00877 (void) printf("Can't mount a version %lld file system " 00878 "on a version %lld pool\n. Pool must be upgraded to mount " 00879 "this file system.", (u_longlong_t)zfsvfs->z_version, 00880 (u_longlong_t)spa_version(dmu_objset_spa(os))); 00881 error = ENOTSUP; 00882 goto out; 00883 } 00884 if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0) 00885 goto out; 00886 zfsvfs->z_norm = (int)zval; 00887 00888 if ((error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &zval)) != 0) 00889 goto out; 00890 zfsvfs->z_utf8 = (zval != 0); 00891 00892 if ((error = zfs_get_zplprop(os, ZFS_PROP_CASE, &zval)) != 0) 00893 goto out; 00894 zfsvfs->z_case = (uint_t)zval; 00895 00896 /* 00897 * Fold case on file systems that are always or sometimes case 00898 * insensitive. 00899 */ 00900 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 00901 zfsvfs->z_case == ZFS_CASE_MIXED) 00902 zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER; 00903 00904 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 00905 zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); 00906 00907 if (zfsvfs->z_use_sa) { 00908 /* should either have both of these objects or none */ 00909 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, 00910 &sa_obj); 00911 if (error) 00912 return (error); 00913 } else { 00914 /* 00915 * Pre SA versions file systems should never touch 00916 * either the attribute registration or layout objects. 00917 */ 00918 sa_obj = 0; 00919 } 00920 00921 error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, 00922 &zfsvfs->z_attr_table); 00923 if (error) 00924 goto out; 00925 00926 if (zfsvfs->z_version >= ZPL_VERSION_SA) 00927 sa_register_update_callback(os, zfs_sa_upgrade); 00928 00929 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, 00930 &zfsvfs->z_root); 00931 if (error) 00932 goto out; 00933 ASSERT(zfsvfs->z_root != 0); 00934 00935 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1, 00936 &zfsvfs->z_unlinkedobj); 00937 if (error) 00938 goto out; 00939 00940 error = zap_lookup(os, MASTER_NODE_OBJ, 00941 zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA], 00942 8, 1, &zfsvfs->z_userquota_obj); 00943 if (error && error != ENOENT) 00944 goto out; 00945 00946 error = zap_lookup(os, MASTER_NODE_OBJ, 00947 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA], 00948 8, 1, &zfsvfs->z_groupquota_obj); 00949 if (error && error != ENOENT) 00950 goto out; 00951 00952 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1, 00953 &zfsvfs->z_fuid_obj); 00954 if (error && error != ENOENT) 00955 goto out; 00956 00957 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1, 00958 &zfsvfs->z_shares_dir); 00959 if (error && error != ENOENT) 00960 goto out; 00961 00962 mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 00963 mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL); 00964 list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 00965 offsetof(znode_t, z_link_node)); 00966 rrw_init(&zfsvfs->z_teardown_lock); 00967 rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL); 00968 rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); 00969 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 00970 mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); 00971 00972 *zfvp = zfsvfs; 00973 return (0); 00974 00975 out: 00976 dmu_objset_disown(os, zfsvfs); 00977 *zfvp = NULL; 00978 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 00979 return (error); 00980 } 00981 00982 static int 00983 zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) 00984 { 00985 int error; 00986 00987 error = zfs_register_callbacks(zfsvfs->z_vfs); 00988 if (error) 00989 return (error); 00990 00991 /* 00992 * Set the objset user_ptr to track its zfsvfs. 00993 */ 00994 mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); 00995 dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 00996 mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); 00997 00998 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); 00999 01000 /* 01001 * If we are not mounting (ie: online recv), then we don't 01002 * have to worry about replaying the log as we blocked all 01003 * operations out since we closed the ZIL. 01004 */ 01005 if (mounting) { 01006 boolean_t readonly; 01007 01008 /* 01009 * During replay we remove the read only flag to 01010 * allow replays to succeed. 01011 */ 01012 readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY; 01013 if (readonly != 0) 01014 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 01015 else 01016 zfs_unlinked_drain(zfsvfs); 01017 01018 /* 01019 * Parse and replay the intent log. 01020 * 01021 * Because of ziltest, this must be done after 01022 * zfs_unlinked_drain(). (Further note: ziltest 01023 * doesn't use readonly mounts, where 01024 * zfs_unlinked_drain() isn't called.) This is because 01025 * ziltest causes spa_sync() to think it's committed, 01026 * but actually it is not, so the intent log contains 01027 * many txg's worth of changes. 01028 * 01029 * In particular, if object N is in the unlinked set in 01030 * the last txg to actually sync, then it could be 01031 * actually freed in a later txg and then reallocated 01032 * in a yet later txg. This would write a "create 01033 * object N" record to the intent log. Normally, this 01034 * would be fine because the spa_sync() would have 01035 * written out the fact that object N is free, before 01036 * we could write the "create object N" intent log 01037 * record. 01038 * 01039 * But when we are in ziltest mode, we advance the "open 01040 * txg" without actually spa_sync()-ing the changes to 01041 * disk. So we would see that object N is still 01042 * allocated and in the unlinked set, and there is an 01043 * intent log record saying to allocate it. 01044 */ 01045 if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) { 01046 if (zil_replay_disable) { 01047 zil_destroy(zfsvfs->z_log, B_FALSE); 01048 } else { 01049 zfsvfs->z_replay = B_TRUE; 01050 zil_replay(zfsvfs->z_os, zfsvfs, 01051 zfs_replay_vector); 01052 zfsvfs->z_replay = B_FALSE; 01053 } 01054 } 01055 zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */ 01056 } 01057 01058 return (0); 01059 } 01060 01061 extern krwlock_t zfsvfs_lock; /* in zfs_znode.c */ 01062 01063 void 01064 zfsvfs_free(zfsvfs_t *zfsvfs) 01065 { 01066 int i; 01067 01068 /* 01069 * This is a barrier to prevent the filesystem from going away in 01070 * zfs_znode_move() until we can safely ensure that the filesystem is 01071 * not unmounted. We consider the filesystem valid before the barrier 01072 * and invalid after the barrier. 01073 */ 01074 rw_enter(&zfsvfs_lock, RW_READER); 01075 rw_exit(&zfsvfs_lock); 01076 01077 zfs_fuid_destroy(zfsvfs); 01078 01079 mutex_destroy(&zfsvfs->z_znodes_lock); 01080 mutex_destroy(&zfsvfs->z_lock); 01081 list_destroy(&zfsvfs->z_all_znodes); 01082 rrw_destroy(&zfsvfs->z_teardown_lock); 01083 rw_destroy(&zfsvfs->z_teardown_inactive_lock); 01084 rw_destroy(&zfsvfs->z_fuid_lock); 01085 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 01086 mutex_destroy(&zfsvfs->z_hold_mtx[i]); 01087 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 01088 } 01089 01090 static void 01091 zfs_set_fuid_feature(zfsvfs_t *zfsvfs) 01092 { 01093 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 01094 if (zfsvfs->z_vfs) { 01095 if (zfsvfs->z_use_fuids) { 01096 vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR); 01097 vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS); 01098 vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS); 01099 vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE); 01100 vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER); 01101 vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE); 01102 } else { 01103 vfs_clear_feature(zfsvfs->z_vfs, VFSFT_XVATTR); 01104 vfs_clear_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS); 01105 vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS); 01106 vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE); 01107 vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER); 01108 vfs_clear_feature(zfsvfs->z_vfs, VFSFT_REPARSE); 01109 } 01110 } 01111 zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); 01112 } 01113 01114 static int 01115 zfs_domount(vfs_t *vfsp, char *osname) 01116 { 01117 uint64_t recordsize, fsid_guid; 01118 int error = 0; 01119 zfsvfs_t *zfsvfs; 01120 vnode_t *vp; 01121 01122 ASSERT(vfsp); 01123 ASSERT(osname); 01124 01125 error = zfsvfs_create(osname, &zfsvfs); 01126 if (error) 01127 return (error); 01128 zfsvfs->z_vfs = vfsp; 01129 01130 if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, 01131 NULL)) 01132 goto out; 01133 zfsvfs->z_vfs->vfs_bsize = SPA_MINBLOCKSIZE; 01134 zfsvfs->z_vfs->mnt_stat.f_iosize = recordsize; 01135 01136 vfsp->vfs_data = zfsvfs; 01137 vfsp->mnt_flag |= MNT_LOCAL; 01138 vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; 01139 vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES; 01140 vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED; 01141 01142 /* 01143 * The fsid is 64 bits, composed of an 8-bit fs type, which 01144 * separates our fsid from any other filesystem types, and a 01145 * 56-bit objset unique ID. The objset unique ID is unique to 01146 * all objsets open on this system, provided by unique_create(). 01147 * The 8-bit fs type must be put in the low bits of fsid[1] 01148 * because that's where other Solaris filesystems put it. 01149 */ 01150 fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os); 01151 ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0); 01152 vfsp->vfs_fsid.val[0] = fsid_guid; 01153 vfsp->vfs_fsid.val[1] = ((fsid_guid>>32) << 8) | 01154 vfsp->mnt_vfc->vfc_typenum & 0xFF; 01155 01156 /* 01157 * Set features for file system. 01158 */ 01159 zfs_set_fuid_feature(zfsvfs); 01160 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { 01161 vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); 01162 vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); 01163 vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE); 01164 } else if (zfsvfs->z_case == ZFS_CASE_MIXED) { 01165 vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); 01166 vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); 01167 } 01168 vfs_set_feature(vfsp, VFSFT_ZEROCOPY_SUPPORTED); 01169 01170 if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 01171 uint64_t pval; 01172 01173 atime_changed_cb(zfsvfs, B_FALSE); 01174 readonly_changed_cb(zfsvfs, B_TRUE); 01175 if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL)) 01176 goto out; 01177 xattr_changed_cb(zfsvfs, pval); 01178 zfsvfs->z_issnap = B_TRUE; 01179 zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED; 01180 01181 mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); 01182 dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 01183 mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); 01184 } else { 01185 error = zfsvfs_setup(zfsvfs, B_TRUE); 01186 } 01187 01188 vfs_mountedfrom(vfsp, osname); 01189 /* Grab extra reference. */ 01190 VERIFY(VFS_ROOT(vfsp, LK_EXCLUSIVE, &vp) == 0); 01191 VOP_UNLOCK(vp, 0); 01192 01193 if (!zfsvfs->z_issnap) 01194 zfsctl_create(zfsvfs); 01195 out: 01196 if (error) { 01197 dmu_objset_disown(zfsvfs->z_os, zfsvfs); 01198 zfsvfs_free(zfsvfs); 01199 } else { 01200 atomic_add_32(&zfs_active_fs_count, 1); 01201 } 01202 01203 return (error); 01204 } 01205 01206 void 01207 zfs_unregister_callbacks(zfsvfs_t *zfsvfs) 01208 { 01209 objset_t *os = zfsvfs->z_os; 01210 struct dsl_dataset *ds; 01211 01212 /* 01213 * Unregister properties. 01214 */ 01215 if (!dmu_objset_is_snapshot(os)) { 01216 ds = dmu_objset_ds(os); 01217 VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, 01218 zfsvfs) == 0); 01219 01220 VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb, 01221 zfsvfs) == 0); 01222 01223 VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, 01224 zfsvfs) == 0); 01225 01226 VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, 01227 zfsvfs) == 0); 01228 01229 VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, 01230 zfsvfs) == 0); 01231 01232 VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, 01233 zfsvfs) == 0); 01234 01235 VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, 01236 zfsvfs) == 0); 01237 01238 VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, 01239 zfsvfs) == 0); 01240 01241 VERIFY(dsl_prop_unregister(ds, "aclinherit", 01242 acl_inherit_changed_cb, zfsvfs) == 0); 01243 01244 VERIFY(dsl_prop_unregister(ds, "vscan", 01245 vscan_changed_cb, zfsvfs) == 0); 01246 } 01247 } 01248 01249 #ifdef SECLABEL 01250 01253 static int 01254 str_to_uint64(char *str, uint64_t *objnum) 01255 { 01256 uint64_t num = 0; 01257 01258 while (*str) { 01259 if (*str < '0' || *str > '9') 01260 return (EINVAL); 01261 01262 num = num*10 + *str++ - '0'; 01263 } 01264 01265 *objnum = num; 01266 return (0); 01267 } 01268 01274 static int 01275 zfs_parse_bootfs(char *bpath, char *outpath) 01276 { 01277 char *slashp; 01278 uint64_t objnum; 01279 int error; 01280 01281 if (*bpath == 0 || *bpath == '/') 01282 return (EINVAL); 01283 01284 (void) strcpy(outpath, bpath); 01285 01286 slashp = strchr(bpath, '/'); 01287 01288 /* if no '/', just return the pool name */ 01289 if (slashp == NULL) { 01290 return (0); 01291 } 01292 01293 /* if not a number, just return the root dataset name */ 01294 if (str_to_uint64(slashp+1, &objnum)) { 01295 return (0); 01296 } 01297 01298 *slashp = '\0'; 01299 error = dsl_dsobj_to_dsname(bpath, objnum, outpath); 01300 *slashp = '/'; 01301 01302 return (error); 01303 } 01304 01313 int 01314 zfs_check_global_label(const char *dsname, const char *hexsl) 01315 { 01316 if (strcasecmp(hexsl, ZFS_MLSLABEL_DEFAULT) == 0) 01317 return (0); 01318 if (strcasecmp(hexsl, ADMIN_HIGH) == 0) 01319 return (0); 01320 if (strcasecmp(hexsl, ADMIN_LOW) == 0) { 01321 /* must be readonly */ 01322 uint64_t rdonly; 01323 01324 if (dsl_prop_get_integer(dsname, 01325 zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL)) 01326 return (EACCES); 01327 return (rdonly ? 0 : EACCES); 01328 } 01329 return (EACCES); 01330 } 01331 01340 static int 01341 zfs_mount_label_policy(vfs_t *vfsp, char *osname) 01342 { 01343 int error, retv; 01344 zone_t *mntzone = NULL; 01345 ts_label_t *mnt_tsl; 01346 bslabel_t *mnt_sl; 01347 bslabel_t ds_sl; 01348 char ds_hexsl[MAXNAMELEN]; 01349 01350 retv = EACCES; /* assume the worst */ 01351 01352 /* 01353 * Start by getting the dataset label if it exists. 01354 */ 01355 error = dsl_prop_get(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL), 01356 1, sizeof (ds_hexsl), &ds_hexsl, NULL); 01357 if (error) 01358 return (EACCES); 01359 01360 /* 01361 * If labeling is NOT enabled, then disallow the mount of datasets 01362 * which have a non-default label already. No other label checks 01363 * are needed. 01364 */ 01365 if (!is_system_labeled()) { 01366 if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0) 01367 return (0); 01368 return (EACCES); 01369 } 01370 01371 /* 01372 * Get the label of the mountpoint. If mounting into the global 01373 * zone (i.e. mountpoint is not within an active zone and the 01374 * zoned property is off), the label must be default or 01375 * admin_low/admin_high only; no other checks are needed. 01376 */ 01377 mntzone = zone_find_by_any_path(refstr_value(vfsp->vfs_mntpt), B_FALSE); 01378 if (mntzone->zone_id == GLOBAL_ZONEID) { 01379 uint64_t zoned; 01380 01381 zone_rele(mntzone); 01382 01383 if (dsl_prop_get_integer(osname, 01384 zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL)) 01385 return (EACCES); 01386 if (!zoned) 01387 return (zfs_check_global_label(osname, ds_hexsl)); 01388 else 01389 /* 01390 * This is the case of a zone dataset being mounted 01391 * initially, before the zone has been fully created; 01392 * allow this mount into global zone. 01393 */ 01394 return (0); 01395 } 01396 01397 mnt_tsl = mntzone->zone_slabel; 01398 ASSERT(mnt_tsl != NULL); 01399 label_hold(mnt_tsl); 01400 mnt_sl = label2bslabel(mnt_tsl); 01401 01402 if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0) { 01403 /* 01404 * The dataset doesn't have a real label, so fabricate one. 01405 */ 01406 char *str = NULL; 01407 01408 if (l_to_str_internal(mnt_sl, &str) == 0 && 01409 dsl_prop_set(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL), 01410 ZPROP_SRC_LOCAL, 1, strlen(str) + 1, str) == 0) 01411 retv = 0; 01412 if (str != NULL) 01413 kmem_free(str, strlen(str) + 1); 01414 } else if (hexstr_to_label(ds_hexsl, &ds_sl) == 0) { 01415 /* 01416 * Now compare labels to complete the MAC check. If the 01417 * labels are equal then allow access. If the mountpoint 01418 * label dominates the dataset label, allow readonly access. 01419 * Otherwise, access is denied. 01420 */ 01421 if (blequal(mnt_sl, &ds_sl)) 01422 retv = 0; 01423 else if (bldominates(mnt_sl, &ds_sl)) { 01424 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 01425 retv = 0; 01426 } 01427 } 01428 01429 label_rele(mnt_tsl); 01430 zone_rele(mntzone); 01431 return (retv); 01432 } 01433 #endif /* SECLABEL */ 01434 01435 #ifdef OPENSOLARIS_MOUNTROOT 01436 static int 01437 zfs_mountroot(vfs_t *vfsp, enum whymountroot why) 01438 { 01439 int error = 0; 01440 static int zfsrootdone = 0; 01441 zfsvfs_t *zfsvfs = NULL; 01442 znode_t *zp = NULL; 01443 vnode_t *vp = NULL; 01444 char *zfs_bootfs; 01445 char *zfs_devid; 01446 01447 ASSERT(vfsp); 01448 01449 /* 01450 * The filesystem that we mount as root is defined in the 01451 * boot property "zfs-bootfs" with a format of 01452 * "poolname/root-dataset-objnum". 01453 */ 01454 if (why == ROOT_INIT) { 01455 if (zfsrootdone++) 01456 return (EBUSY); 01457 /* 01458 * the process of doing a spa_load will require the 01459 * clock to be set before we could (for example) do 01460 * something better by looking at the timestamp on 01461 * an uberblock, so just set it to -1. 01462 */ 01463 clkset(-1); 01464 01465 if ((zfs_bootfs = spa_get_bootprop("zfs-bootfs")) == NULL) { 01466 cmn_err(CE_NOTE, "spa_get_bootfs: can not get " 01467 "bootfs name"); 01468 return (EINVAL); 01469 } 01470 zfs_devid = spa_get_bootprop("diskdevid"); 01471 error = spa_import_rootpool(rootfs.bo_name, zfs_devid); 01472 if (zfs_devid) 01473 spa_free_bootprop(zfs_devid); 01474 if (error) { 01475 spa_free_bootprop(zfs_bootfs); 01476 cmn_err(CE_NOTE, "spa_import_rootpool: error %d", 01477 error); 01478 return (error); 01479 } 01480 if (error = zfs_parse_bootfs(zfs_bootfs, rootfs.bo_name)) { 01481 spa_free_bootprop(zfs_bootfs); 01482 cmn_err(CE_NOTE, "zfs_parse_bootfs: error %d", 01483 error); 01484 return (error); 01485 } 01486 01487 spa_free_bootprop(zfs_bootfs); 01488 01489 if (error = vfs_lock(vfsp)) 01490 return (error); 01491 01492 if (error = zfs_domount(vfsp, rootfs.bo_name)) { 01493 cmn_err(CE_NOTE, "zfs_domount: error %d", error); 01494 goto out; 01495 } 01496 01497 zfsvfs = (zfsvfs_t *)vfsp->vfs_data; 01498 ASSERT(zfsvfs); 01499 if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp)) { 01500 cmn_err(CE_NOTE, "zfs_zget: error %d", error); 01501 goto out; 01502 } 01503 01504 vp = ZTOV(zp); 01505 mutex_enter(&vp->v_lock); 01506 vp->v_flag |= VROOT; 01507 mutex_exit(&vp->v_lock); 01508 rootvp = vp; 01509 01510 /* 01511 * Leave rootvp held. The root file system is never unmounted. 01512 */ 01513 01514 vfs_add((struct vnode *)0, vfsp, 01515 (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0); 01516 out: 01517 vfs_unlock(vfsp); 01518 return (error); 01519 } else if (why == ROOT_REMOUNT) { 01520 readonly_changed_cb(vfsp->vfs_data, B_FALSE); 01521 vfsp->vfs_flag |= VFS_REMOUNT; 01522 01523 /* refresh mount options */ 01524 zfs_unregister_callbacks(vfsp->vfs_data); 01525 return (zfs_register_callbacks(vfsp)); 01526 01527 } else if (why == ROOT_UNMOUNT) { 01528 zfs_unregister_callbacks((zfsvfs_t *)vfsp->vfs_data); 01529 (void) zfs_sync(vfsp, 0, 0); 01530 return (0); 01531 } 01532 01533 /* 01534 * if "why" is equal to anything else other than ROOT_INIT, 01535 * ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it. 01536 */ 01537 return (ENOTSUP); 01538 } 01539 #endif /* OPENSOLARIS_MOUNTROOT */ 01540 01541 static int 01542 getpoolname(const char *osname, char *poolname) 01543 { 01544 char *p; 01545 01546 p = strchr(osname, '/'); 01547 if (p == NULL) { 01548 if (strlen(osname) >= MAXNAMELEN) 01549 return (ENAMETOOLONG); 01550 (void) strcpy(poolname, osname); 01551 } else { 01552 if (p - osname >= MAXNAMELEN) 01553 return (ENAMETOOLONG); 01554 (void) strncpy(poolname, osname, p - osname); 01555 poolname[p - osname] = '\0'; 01556 } 01557 return (0); 01558 } 01559 01560 /*ARGSUSED*/ 01561 static int 01562 zfs_mount(vfs_t *vfsp) 01563 { 01564 kthread_t *td = curthread; 01565 vnode_t *mvp = vfsp->mnt_vnodecovered; 01566 cred_t *cr = td->td_ucred; 01567 char *osname; 01568 int error = 0; 01569 int canwrite; 01570 01571 if (!prison_allow(td->td_ucred, PR_ALLOW_MOUNT_ZFS)) 01572 return (EPERM); 01573 01574 if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL)) 01575 return (EINVAL); 01576 01577 /* 01578 * If full-owner-access is enabled and delegated administration is 01579 * turned on, we must set nosuid. 01580 */ 01581 if (zfs_super_owner && 01582 dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) { 01583 secpolicy_fs_mount_clearopts(cr, vfsp); 01584 } 01585 01586 /* 01587 * Check for mount privilege? 01588 * 01589 * If we don't have privilege then see if 01590 * we have local permission to allow it 01591 */ 01592 error = secpolicy_fs_mount(cr, mvp, vfsp); 01593 if (error) { 01594 if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != 0) 01595 goto out; 01596 01597 if (!(vfsp->vfs_flag & MS_REMOUNT)) { 01598 vattr_t vattr; 01599 01600 /* 01601 * Make sure user is the owner of the mount point 01602 * or has sufficient privileges. 01603 */ 01604 01605 vattr.va_mask = AT_UID; 01606 01607 vn_lock(mvp, LK_SHARED | LK_RETRY); 01608 if (VOP_GETATTR(mvp, &vattr, cr)) { 01609 VOP_UNLOCK(mvp, 0); 01610 goto out; 01611 } 01612 01613 if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 && 01614 VOP_ACCESS(mvp, VWRITE, cr, td) != 0) { 01615 VOP_UNLOCK(mvp, 0); 01616 goto out; 01617 } 01618 VOP_UNLOCK(mvp, 0); 01619 } 01620 01621 secpolicy_fs_mount_clearopts(cr, vfsp); 01622 } 01623 01624 /* 01625 * Refuse to mount a filesystem if we are in a local zone and the 01626 * dataset is not visible. 01627 */ 01628 if (!INGLOBALZONE(curthread) && 01629 (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 01630 error = EPERM; 01631 goto out; 01632 } 01633 01634 #ifdef SECLABEL 01635 error = zfs_mount_label_policy(vfsp, osname); 01636 if (error) 01637 goto out; 01638 #endif 01639 01640 vfsp->vfs_flag |= MNT_NFS4ACLS; 01641 01642 /* 01643 * When doing a remount, we simply refresh our temporary properties 01644 * according to those options set in the current VFS options. 01645 */ 01646 if (vfsp->vfs_flag & MS_REMOUNT) { 01647 /* refresh mount options */ 01648 zfs_unregister_callbacks(vfsp->vfs_data); 01649 error = zfs_register_callbacks(vfsp); 01650 goto out; 01651 } 01652 01653 /* Initial root mount: try hard to import the requested root pool. */ 01654 if ((vfsp->vfs_flag & MNT_ROOTFS) != 0 && 01655 (vfsp->vfs_flag & MNT_UPDATE) == 0) { 01656 char pname[MAXNAMELEN]; 01657 01658 error = getpoolname(osname, pname); 01659 if (error == 0) 01660 error = spa_import_rootpool(pname); 01661 if (error) 01662 goto out; 01663 } 01664 DROP_GIANT(); 01665 error = zfs_domount(vfsp, osname); 01666 PICKUP_GIANT(); 01667 01668 #ifdef sun 01669 /* 01670 * Add an extra VFS_HOLD on our parent vfs so that it can't 01671 * disappear due to a forced unmount. 01672 */ 01673 if (error == 0 && ((zfsvfs_t *)vfsp->vfs_data)->z_issnap) 01674 VFS_HOLD(mvp->v_vfsp); 01675 #endif /* sun */ 01676 01677 out: 01678 return (error); 01679 } 01680 01681 static int 01682 zfs_statfs(vfs_t *vfsp, struct statfs *statp) 01683 { 01684 zfsvfs_t *zfsvfs = vfsp->vfs_data; 01685 uint64_t refdbytes, availbytes, usedobjs, availobjs; 01686 01687 statp->f_version = STATFS_VERSION; 01688 01689 ZFS_ENTER(zfsvfs); 01690 01691 dmu_objset_space(zfsvfs->z_os, 01692 &refdbytes, &availbytes, &usedobjs, &availobjs); 01693 01694 /* 01695 * The underlying storage pool actually uses multiple block sizes. 01696 * We report the fragsize as the smallest block size we support, 01697 * and we report our blocksize as the filesystem's maximum blocksize. 01698 */ 01699 statp->f_bsize = SPA_MINBLOCKSIZE; 01700 statp->f_iosize = zfsvfs->z_vfs->mnt_stat.f_iosize; 01701 01702 /* 01703 * The following report "total" blocks of various kinds in the 01704 * file system, but reported in terms of f_frsize - the 01705 * "fragment" size. 01706 */ 01707 01708 statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT; 01709 statp->f_bfree = availbytes / statp->f_bsize; 01710 statp->f_bavail = statp->f_bfree; /* no root reservation */ 01711 01712 /* 01713 * statvfs() should really be called statufs(), because it assumes 01714 * static metadata. ZFS doesn't preallocate files, so the best 01715 * we can do is report the max that could possibly fit in f_files, 01716 * and that minus the number actually used in f_ffree. 01717 * For f_ffree, report the smaller of the number of object available 01718 * and the number of blocks (each object will take at least a block). 01719 */ 01720 statp->f_ffree = MIN(availobjs, statp->f_bfree); 01721 statp->f_files = statp->f_ffree + usedobjs; 01722 01723 /* 01724 * We're a zfs filesystem. 01725 */ 01726 (void) strlcpy(statp->f_fstypename, "zfs", sizeof(statp->f_fstypename)); 01727 01728 strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname, 01729 sizeof(statp->f_mntfromname)); 01730 strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname, 01731 sizeof(statp->f_mntonname)); 01732 01733 statp->f_namemax = ZFS_MAXNAMELEN; 01734 01735 ZFS_EXIT(zfsvfs); 01736 return (0); 01737 } 01738 01739 int 01740 zfs_vnode_lock(vnode_t *vp, int flags) 01741 { 01742 int error; 01743 01744 ASSERT(vp != NULL); 01745 01746 error = vn_lock(vp, flags); 01747 return (error); 01748 } 01749 01750 static int 01751 zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp) 01752 { 01753 zfsvfs_t *zfsvfs = vfsp->vfs_data; 01754 znode_t *rootzp; 01755 int error; 01756 01757 ZFS_ENTER_NOERROR(zfsvfs); 01758 01759 error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 01760 if (error == 0) 01761 *vpp = ZTOV(rootzp); 01762 01763 ZFS_EXIT(zfsvfs); 01764 01765 if (error == 0) { 01766 error = zfs_vnode_lock(*vpp, flags); 01767 if (error == 0) 01768 (*vpp)->v_vflag |= VV_ROOT; 01769 } 01770 if (error != 0) 01771 *vpp = NULL; 01772 01773 return (error); 01774 } 01775 01782 static int 01783 zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) 01784 { 01785 znode_t *zp; 01786 01787 rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); 01788 01789 if (!unmounting) { 01790 /* 01791 * We purge the parent filesystem's vfsp as the parent 01792 * filesystem and all of its snapshots have their vnode's 01793 * v_vfsp set to the parent's filesystem's vfsp. Note, 01794 * 'z_parent' is self referential for non-snapshots. 01795 */ 01796 (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 01797 #ifdef FREEBSD_NAMECACHE 01798 cache_purgevfs(zfsvfs->z_parent->z_vfs); 01799 #endif 01800 } 01801 01802 /* 01803 * Close the zil. NB: Can't close the zil while zfs_inactive 01804 * threads are blocked as zil_close can call zfs_inactive. 01805 */ 01806 if (zfsvfs->z_log) { 01807 zil_close(zfsvfs->z_log); 01808 zfsvfs->z_log = NULL; 01809 } 01810 01811 rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER); 01812 01813 /* 01814 * If we are not unmounting (ie: online recv) and someone already 01815 * unmounted this file system while we were doing the switcheroo, 01816 * or a reopen of z_os failed then just bail out now. 01817 */ 01818 if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { 01819 rw_exit(&zfsvfs->z_teardown_inactive_lock); 01820 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 01821 return (EIO); 01822 } 01823 01824 /* 01825 * At this point there are no vops active, and any new vops will 01826 * fail with EIO since we have z_teardown_lock for writer (only 01827 * relavent for forced unmount). 01828 * 01829 * Release all holds on dbufs. 01830 */ 01831 mutex_enter(&zfsvfs->z_znodes_lock); 01832 for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL; 01833 zp = list_next(&zfsvfs->z_all_znodes, zp)) 01834 if (zp->z_sa_hdl) { 01835 ASSERT(ZTOV(zp)->v_count >= 0); 01836 zfs_znode_dmu_fini(zp); 01837 } 01838 mutex_exit(&zfsvfs->z_znodes_lock); 01839 01840 /* 01841 * If we are unmounting, set the unmounted flag and let new vops 01842 * unblock. zfs_inactive will have the unmounted behavior, and all 01843 * other vops will fail with EIO. 01844 */ 01845 if (unmounting) { 01846 zfsvfs->z_unmounted = B_TRUE; 01847 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 01848 rw_exit(&zfsvfs->z_teardown_inactive_lock); 01849 01850 #ifdef __FreeBSD__ 01851 /* 01852 * Some znodes might not be fully reclaimed, wait for them. 01853 */ 01854 mutex_enter(&zfsvfs->z_znodes_lock); 01855 while (list_head(&zfsvfs->z_all_znodes) != NULL) { 01856 msleep(zfsvfs, &zfsvfs->z_znodes_lock, 0, 01857 "zteardown", 0); 01858 } 01859 mutex_exit(&zfsvfs->z_znodes_lock); 01860 #endif 01861 } 01862 01863 /* 01864 * z_os will be NULL if there was an error in attempting to reopen 01865 * zfsvfs, so just return as the properties had already been 01866 * unregistered and cached data had been evicted before. 01867 */ 01868 if (zfsvfs->z_os == NULL) 01869 return (0); 01870 01871 /* 01872 * Unregister properties. 01873 */ 01874 zfs_unregister_callbacks(zfsvfs); 01875 01876 /* 01877 * Evict cached data 01878 */ 01879 if (dsl_dataset_is_dirty(dmu_objset_ds(zfsvfs->z_os)) && 01880 !(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY)) 01881 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 01882 (void) dmu_objset_evict_dbufs(zfsvfs->z_os); 01883 01884 return (0); 01885 } 01886 01887 /*ARGSUSED*/ 01888 static int 01889 zfs_umount(vfs_t *vfsp, int fflag) 01890 { 01891 kthread_t *td = curthread; 01892 zfsvfs_t *zfsvfs = vfsp->vfs_data; 01893 objset_t *os; 01894 cred_t *cr = td->td_ucred; 01895 int ret; 01896 01897 ret = secpolicy_fs_unmount(cr, vfsp); 01898 if (ret) { 01899 if (dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource), 01900 ZFS_DELEG_PERM_MOUNT, cr)) 01901 return (ret); 01902 } 01903 01904 /* 01905 * We purge the parent filesystem's vfsp as the parent filesystem 01906 * and all of its snapshots have their vnode's v_vfsp set to the 01907 * parent's filesystem's vfsp. Note, 'z_parent' is self 01908 * referential for non-snapshots. 01909 */ 01910 (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 01911 01912 /* 01913 * Unmount any snapshots mounted under .zfs before unmounting the 01914 * dataset itself. 01915 */ 01916 if (zfsvfs->z_ctldir != NULL) { 01917 if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) 01918 return (ret); 01919 ret = vflush(vfsp, 0, 0, td); 01920 ASSERT(ret == EBUSY); 01921 if (!(fflag & MS_FORCE)) { 01922 if (zfsvfs->z_ctldir->v_count > 1) 01923 return (EBUSY); 01924 ASSERT(zfsvfs->z_ctldir->v_count == 1); 01925 } 01926 zfsctl_destroy(zfsvfs); 01927 ASSERT(zfsvfs->z_ctldir == NULL); 01928 } 01929 01930 if (fflag & MS_FORCE) { 01931 /* 01932 * Mark file system as unmounted before calling 01933 * vflush(FORCECLOSE). This way we ensure no future vnops 01934 * will be called and risk operating on DOOMED vnodes. 01935 */ 01936 rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); 01937 zfsvfs->z_unmounted = B_TRUE; 01938 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 01939 } 01940 01941 /* 01942 * Flush all the files. 01943 */ 01944 ret = vflush(vfsp, 1, (fflag & MS_FORCE) ? FORCECLOSE : 0, td); 01945 if (ret != 0) { 01946 if (!zfsvfs->z_issnap) { 01947 zfsctl_create(zfsvfs); 01948 ASSERT(zfsvfs->z_ctldir != NULL); 01949 } 01950 return (ret); 01951 } 01952 01953 if (!(fflag & MS_FORCE)) { 01954 /* 01955 * Check the number of active vnodes in the file system. 01956 * Our count is maintained in the vfs structure, but the 01957 * number is off by 1 to indicate a hold on the vfs 01958 * structure itself. 01959 * 01960 * The '.zfs' directory maintains a reference of its 01961 * own, and any active references underneath are 01962 * reflected in the vnode count. 01963 */ 01964 if (zfsvfs->z_ctldir == NULL) { 01965 if (vfsp->vfs_count > 1) 01966 return (EBUSY); 01967 } else { 01968 if (vfsp->vfs_count > 2 || 01969 zfsvfs->z_ctldir->v_count > 1) 01970 return (EBUSY); 01971 } 01972 } 01973 01974 VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0); 01975 os = zfsvfs->z_os; 01976 01977 /* 01978 * z_os will be NULL if there was an error in 01979 * attempting to reopen zfsvfs. 01980 */ 01981 if (os != NULL) { 01982 /* 01983 * Unset the objset user_ptr. 01984 */ 01985 mutex_enter(&os->os_user_ptr_lock); 01986 dmu_objset_set_user(os, NULL); 01987 mutex_exit(&os->os_user_ptr_lock); 01988 01989 /* 01990 * Finally release the objset 01991 */ 01992 dmu_objset_disown(os, zfsvfs); 01993 } 01994 01995 /* 01996 * We can now safely destroy the '.zfs' directory node. 01997 */ 01998 if (zfsvfs->z_ctldir != NULL) 01999 zfsctl_destroy(zfsvfs); 02000 if (zfsvfs->z_issnap) { 02001 vnode_t *svp = vfsp->mnt_vnodecovered; 02002 02003 if (svp->v_count >= 2) 02004 VN_RELE(svp); 02005 } 02006 zfs_freevfs(vfsp); 02007 02008 return (0); 02009 } 02010 02011 static int 02012 zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp) 02013 { 02014 zfsvfs_t *zfsvfs = vfsp->vfs_data; 02015 znode_t *zp; 02016 int err; 02017 02018 /* 02019 * zfs_zget() can't operate on virtual entries like .zfs/ or 02020 * .zfs/snapshot/ directories, that's why we return EOPNOTSUPP. 02021 * This will make NFS to switch to LOOKUP instead of using VGET. 02022 */ 02023 if (ino == ZFSCTL_INO_ROOT || ino == ZFSCTL_INO_SNAPDIR) 02024 return (EOPNOTSUPP); 02025 02026 ZFS_ENTER(zfsvfs); 02027 err = zfs_zget(zfsvfs, ino, &zp); 02028 if (err == 0 && zp->z_unlinked) { 02029 VN_RELE(ZTOV(zp)); 02030 err = EINVAL; 02031 } 02032 if (err == 0) 02033 *vpp = ZTOV(zp); 02034 ZFS_EXIT(zfsvfs); 02035 if (err == 0) 02036 err = zfs_vnode_lock(*vpp, flags); 02037 if (err != 0) 02038 *vpp = NULL; 02039 return (err); 02040 } 02041 02042 static int 02043 zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 02044 struct ucred **credanonp, int *numsecflavors, int **secflavors) 02045 { 02046 zfsvfs_t *zfsvfs = vfsp->vfs_data; 02047 02048 /* 02049 * If this is regular file system vfsp is the same as 02050 * zfsvfs->z_parent->z_vfs, but if it is snapshot, 02051 * zfsvfs->z_parent->z_vfs represents parent file system 02052 * which we have to use here, because only this file system 02053 * has mnt_export configured. 02054 */ 02055 return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp, 02056 credanonp, numsecflavors, secflavors)); 02057 } 02058 02059 CTASSERT(SHORT_FID_LEN <= sizeof(struct fid)); 02060 CTASSERT(LONG_FID_LEN <= sizeof(struct fid)); 02061 02062 static int 02063 zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp) 02064 { 02065 zfsvfs_t *zfsvfs = vfsp->vfs_data; 02066 znode_t *zp; 02067 uint64_t object = 0; 02068 uint64_t fid_gen = 0; 02069 uint64_t gen_mask; 02070 uint64_t zp_gen; 02071 int i, err; 02072 02073 *vpp = NULL; 02074 02075 ZFS_ENTER(zfsvfs); 02076 02077 /* 02078 * On FreeBSD we can get snapshot's mount point or its parent file 02079 * system mount point depending if snapshot is already mounted or not. 02080 */ 02081 if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) { 02082 zfid_long_t *zlfid = (zfid_long_t *)fidp; 02083 uint64_t objsetid = 0; 02084 uint64_t setgen = 0; 02085 02086 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 02087 objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 02088 02089 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 02090 setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 02091 02092 ZFS_EXIT(zfsvfs); 02093 02094 err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 02095 if (err) 02096 return (EINVAL); 02097 ZFS_ENTER(zfsvfs); 02098 } 02099 02100 if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 02101 zfid_short_t *zfid = (zfid_short_t *)fidp; 02102 02103 for (i = 0; i < sizeof (zfid->zf_object); i++) 02104 object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 02105 02106 for (i = 0; i < sizeof (zfid->zf_gen); i++) 02107 fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 02108 } else { 02109 ZFS_EXIT(zfsvfs); 02110 return (EINVAL); 02111 } 02112 02113 /* A zero fid_gen means we are in the .zfs control directories */ 02114 if (fid_gen == 0 && 02115 (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { 02116 *vpp = zfsvfs->z_ctldir; 02117 ASSERT(*vpp != NULL); 02118 if (object == ZFSCTL_INO_SNAPDIR) { 02119 VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL, 02120 0, NULL, NULL, NULL, NULL, NULL) == 0); 02121 } else { 02122 VN_HOLD(*vpp); 02123 } 02124 ZFS_EXIT(zfsvfs); 02125 err = zfs_vnode_lock(*vpp, flags | LK_RETRY); 02126 if (err != 0) 02127 *vpp = NULL; 02128 return (err); 02129 } 02130 02131 gen_mask = -1ULL >> (64 - 8 * i); 02132 02133 dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); 02134 if (err = zfs_zget(zfsvfs, object, &zp)) { 02135 ZFS_EXIT(zfsvfs); 02136 return (err); 02137 } 02138 (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen, 02139 sizeof (uint64_t)); 02140 zp_gen = zp_gen & gen_mask; 02141 if (zp_gen == 0) 02142 zp_gen = 1; 02143 if (zp->z_unlinked || zp_gen != fid_gen) { 02144 dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); 02145 VN_RELE(ZTOV(zp)); 02146 ZFS_EXIT(zfsvfs); 02147 return (EINVAL); 02148 } 02149 02150 *vpp = ZTOV(zp); 02151 ZFS_EXIT(zfsvfs); 02152 err = zfs_vnode_lock(*vpp, flags | LK_RETRY); 02153 if (err == 0) 02154 vnode_create_vobject(*vpp, zp->z_size, curthread); 02155 else 02156 *vpp = NULL; 02157 return (err); 02158 } 02159 02166 int 02167 zfs_suspend_fs(zfsvfs_t *zfsvfs) 02168 { 02169 int error; 02170 02171 if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0) 02172 return (error); 02173 dmu_objset_disown(zfsvfs->z_os, zfsvfs); 02174 02175 return (0); 02176 } 02177 02181 int 02182 zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname) 02183 { 02184 int err; 02185 02186 ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock)); 02187 ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)); 02188 02189 err = dmu_objset_own(osname, DMU_OST_ZFS, B_FALSE, zfsvfs, 02190 &zfsvfs->z_os); 02191 if (err) { 02192 zfsvfs->z_os = NULL; 02193 } else { 02194 znode_t *zp; 02195 uint64_t sa_obj = 0; 02196 02197 /* 02198 * Make sure version hasn't changed 02199 */ 02200 02201 err = zfs_get_zplprop(zfsvfs->z_os, ZFS_PROP_VERSION, 02202 &zfsvfs->z_version); 02203 02204 if (err) 02205 goto bail; 02206 02207 err = zap_lookup(zfsvfs->z_os, MASTER_NODE_OBJ, 02208 ZFS_SA_ATTRS, 8, 1, &sa_obj); 02209 02210 if (err && zfsvfs->z_version >= ZPL_VERSION_SA) 02211 goto bail; 02212 02213 if ((err = sa_setup(zfsvfs->z_os, sa_obj, 02214 zfs_attr_table, ZPL_END, &zfsvfs->z_attr_table)) != 0) 02215 goto bail; 02216 02217 if (zfsvfs->z_version >= ZPL_VERSION_SA) 02218 sa_register_update_callback(zfsvfs->z_os, 02219 zfs_sa_upgrade); 02220 02221 VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0); 02222 02223 zfs_set_fuid_feature(zfsvfs); 02224 02225 /* 02226 * Attempt to re-establish all the active znodes with 02227 * their dbufs. If a zfs_rezget() fails, then we'll let 02228 * any potential callers discover that via ZFS_ENTER_VERIFY_VP 02229 * when they try to use their znode. 02230 */ 02231 mutex_enter(&zfsvfs->z_znodes_lock); 02232 for (zp = list_head(&zfsvfs->z_all_znodes); zp; 02233 zp = list_next(&zfsvfs->z_all_znodes, zp)) { 02234 (void) zfs_rezget(zp); 02235 } 02236 mutex_exit(&zfsvfs->z_znodes_lock); 02237 } 02238 02239 bail: 02240 /* release the VOPs */ 02241 rw_exit(&zfsvfs->z_teardown_inactive_lock); 02242 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 02243 02244 if (err) { 02245 /* 02246 * Since we couldn't reopen zfsvfs::z_os, or 02247 * setup the sa framework force unmount this file system. 02248 */ 02249 if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) 02250 (void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread); 02251 } 02252 return (err); 02253 } 02254 02255 static void 02256 zfs_freevfs(vfs_t *vfsp) 02257 { 02258 zfsvfs_t *zfsvfs = vfsp->vfs_data; 02259 02260 #ifdef sun 02261 /* 02262 * If this is a snapshot, we have an extra VFS_HOLD on our parent 02263 * from zfs_mount(). Release it here. If we came through 02264 * zfs_mountroot() instead, we didn't grab an extra hold, so 02265 * skip the VFS_RELE for rootvfs. 02266 */ 02267 if (zfsvfs->z_issnap && (vfsp != rootvfs)) 02268 VFS_RELE(zfsvfs->z_parent->z_vfs); 02269 #endif /* sun */ 02270 02271 zfsvfs_free(zfsvfs); 02272 02273 atomic_add_32(&zfs_active_fs_count, -1); 02274 } 02275 02276 #ifdef __i386__ 02277 static int desiredvnodes_backup; 02278 #endif 02279 02280 static void 02281 zfs_vnodes_adjust(void) 02282 { 02283 #ifdef __i386__ 02284 int newdesiredvnodes; 02285 02286 desiredvnodes_backup = desiredvnodes; 02287 02288 /* 02289 * We calculate newdesiredvnodes the same way it is done in 02290 * vntblinit(). If it is equal to desiredvnodes, it means that 02291 * it wasn't tuned by the administrator and we can tune it down. 02292 */ 02293 newdesiredvnodes = min(maxproc + cnt.v_page_count / 4, 2 * 02294 vm_kmem_size / (5 * (sizeof(struct vm_object) + 02295 sizeof(struct vnode)))); 02296 if (newdesiredvnodes == desiredvnodes) 02297 desiredvnodes = (3 * newdesiredvnodes) / 4; 02298 #endif 02299 } 02300 02301 static void 02302 zfs_vnodes_adjust_back(void) 02303 { 02304 02305 #ifdef __i386__ 02306 desiredvnodes = desiredvnodes_backup; 02307 #endif 02308 } 02309 02310 void 02311 zfs_init(void) 02312 { 02313 02314 printf("ZFS filesystem version: " ZPL_VERSION_STRING "\n"); 02315 02316 /* 02317 * Initialize .zfs directory structures 02318 */ 02319 zfsctl_init(); 02320 02321 /* 02322 * Initialize znode cache, vnode ops, etc... 02323 */ 02324 zfs_znode_init(); 02325 02326 /* 02327 * Reduce number of vnodes. Originally number of vnodes is calculated 02328 * with UFS inode in mind. We reduce it here, because it's too big for 02329 * ZFS/i386. 02330 */ 02331 zfs_vnodes_adjust(); 02332 02333 dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb); 02334 } 02335 02336 void 02337 zfs_fini(void) 02338 { 02339 zfsctl_fini(); 02340 zfs_znode_fini(); 02341 zfs_vnodes_adjust_back(); 02342 } 02343 02344 int 02345 zfs_busy(void) 02346 { 02347 return (zfs_active_fs_count != 0); 02348 } 02349 02350 int 02351 zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) 02352 { 02353 int error; 02354 objset_t *os = zfsvfs->z_os; 02355 dmu_tx_t *tx; 02356 02357 if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) 02358 return (EINVAL); 02359 02360 if (newvers < zfsvfs->z_version) 02361 return (EINVAL); 02362 02363 if (zfs_spa_version_map(newvers) > 02364 spa_version(dmu_objset_spa(zfsvfs->z_os))) 02365 return (ENOTSUP); 02366 02367 tx = dmu_tx_create(os); 02368 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR); 02369 if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) { 02370 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE, 02371 ZFS_SA_ATTRS); 02372 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 02373 } 02374 error = dmu_tx_assign(tx, TXG_WAIT); 02375 if (error) { 02376 dmu_tx_abort(tx); 02377 return (error); 02378 } 02379 02380 error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 02381 8, 1, &newvers, tx); 02382 02383 if (error) { 02384 dmu_tx_commit(tx); 02385 return (error); 02386 } 02387 02388 if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) { 02389 uint64_t sa_obj; 02390 02391 ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=, 02392 SPA_VERSION_SA); 02393 sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, 02394 DMU_OT_NONE, 0, tx); 02395 02396 error = zap_add(os, MASTER_NODE_OBJ, 02397 ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); 02398 ASSERT0(error); 02399 02400 VERIFY(0 == sa_set_sa_object(os, sa_obj)); 02401 sa_register_update_callback(os, zfs_sa_upgrade); 02402 } 02403 02404 spa_history_log_internal(LOG_DS_UPGRADE, 02405 dmu_objset_spa(os), tx, "oldver=%llu newver=%llu dataset = %llu", 02406 zfsvfs->z_version, newvers, dmu_objset_id(os)); 02407 02408 dmu_tx_commit(tx); 02409 02410 zfsvfs->z_version = newvers; 02411 02412 zfs_set_fuid_feature(zfsvfs); 02413 02414 return (0); 02415 } 02416 02420 int 02421 zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) 02422 { 02423 const char *pname; 02424 int error = ENOENT; 02425 02426 /* 02427 * Look up the file system's value for the property. For the 02428 * version property, we look up a slightly different string. 02429 */ 02430 if (prop == ZFS_PROP_VERSION) 02431 pname = ZPL_VERSION_STR; 02432 else 02433 pname = zfs_prop_to_name(prop); 02434 02435 if (os != NULL) 02436 error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value); 02437 02438 if (error == ENOENT) { 02439 /* No value set, use the default value */ 02440 switch (prop) { 02441 case ZFS_PROP_VERSION: 02442 *value = ZPL_VERSION; 02443 break; 02444 case ZFS_PROP_NORMALIZE: 02445 case ZFS_PROP_UTF8ONLY: 02446 *value = 0; 02447 break; 02448 case ZFS_PROP_CASE: 02449 *value = ZFS_CASE_SENSITIVE; 02450 break; 02451 default: 02452 return (error); 02453 } 02454 error = 0; 02455 } 02456 return (error); 02457 } 02458 02459 #ifdef _KERNEL 02460 void 02461 zfsvfs_update_fromname(const char *oldname, const char *newname) 02462 { 02463 char tmpbuf[MAXPATHLEN]; 02464 struct mount *mp; 02465 char *fromname; 02466 size_t oldlen; 02467 02468 oldlen = strlen(oldname); 02469 02470 mtx_lock(&mountlist_mtx); 02471 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 02472 fromname = mp->mnt_stat.f_mntfromname; 02473 if (strcmp(fromname, oldname) == 0) { 02474 (void)strlcpy(fromname, newname, 02475 sizeof(mp->mnt_stat.f_mntfromname)); 02476 continue; 02477 } 02478 if (strncmp(fromname, oldname, oldlen) == 0 && 02479 (fromname[oldlen] == '/' || fromname[oldlen] == '@')) { 02480 (void)snprintf(tmpbuf, sizeof(tmpbuf), "%s%s", 02481 newname, fromname + oldlen); 02482 (void)strlcpy(fromname, tmpbuf, 02483 sizeof(mp->mnt_stat.f_mntfromname)); 02484 continue; 02485 } 02486 } 02487 mtx_unlock(&mountlist_mtx); 02488 } 02489 #endif