Index: sys/cddl/contrib/opensolaris =================================================================== --- sys/cddl/contrib/opensolaris (revision 247598) +++ sys/cddl/contrib/opensolaris (working copy) Property changes on: sys/cddl/contrib/opensolaris ___________________________________________________________________ Modified: svn:mergeinfo Merged /vendor-sys/illumos/dist:r247578 Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c (revision 247598) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c (working copy) @@ -83,23 +83,25 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, check_hostid, CTLFL "Check hostid on import?"); typedef enum zti_modes { - zti_mode_fixed, /* value is # of threads (min 1) */ - zti_mode_online_percent, /* value is % of online CPUs */ - zti_mode_batch, /* cpu-intensive; value is ignored */ - zti_mode_null, /* don't create a taskq */ - zti_nmodes + ZTI_MODE_FIXED, /* value is # of threads (min 1) */ + ZTI_MODE_ONLINE_PERCENT, /* value is % of online CPUs */ + ZTI_MODE_BATCH, /* cpu-intensive; value is ignored */ + ZTI_MODE_NULL, /* don't create a taskq */ + ZTI_NMODES } zti_modes_t; -#define ZTI_FIX(n) { zti_mode_fixed, (n) } -#define ZTI_PCT(n) { zti_mode_online_percent, (n) } -#define ZTI_BATCH { zti_mode_batch, 0 } -#define ZTI_NULL { zti_mode_null, 0 } +#define ZTI_P(n, q) { ZTI_MODE_FIXED, (n), (q) } +#define ZTI_PCT(n) { ZTI_MODE_ONLINE_PERCENT, (n), 1 } +#define ZTI_BATCH { ZTI_MODE_BATCH, 0, 1 } +#define ZTI_NULL { ZTI_MODE_NULL, 0, 0 } -#define ZTI_ONE ZTI_FIX(1) +#define ZTI_N(n) ZTI_P(n, 1) +#define ZTI_ONE ZTI_N(1) typedef struct zio_taskq_info { - enum zti_modes zti_mode; + zti_modes_t zti_mode; uint_t zti_value; + uint_t zti_count; } zio_taskq_info_t; static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = { @@ -107,17 +109,30 @@ static const char *const zio_taskq_types[ZIO_TASKQ }; /* - * Define the taskq threads for the following I/O types: - * NULL, READ, WRITE, FREE, CLAIM, and IOCTL + * This table defines the taskq settings for each ZFS I/O type. When + * initializing a pool, we use this table to create an appropriately sized + * taskq. Some operations are low volume and therefore have a small, static + * number of threads assigned to their taskqs using the ZTI_N(#) or ZTI_ONE + * macros. Other operations process a large amount of data; the ZTI_BATCH + * macro causes us to create a taskq oriented for throughput. Some operations + * are so high frequency and short-lived that the taskq itself can become a a + * point of lock contention. The ZTI_P(#, #) macro indicates that we need an + * additional degree of parallelism specified by the number of threads per- + * taskq and the number of taskqs; when dispatching an event in this case, the + * particular taskq is chosen at random. + * + * The different taskq priorities are to handle the different contexts (issue + * and interrupt) and then to reserve threads for ZIO_PRIORITY_NOW I/Os that + * need to be handled with minimum delay. */ const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = { /* ISSUE ISSUE_HIGH INTR INTR_HIGH */ - { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, - { ZTI_FIX(8), ZTI_NULL, ZTI_BATCH, ZTI_NULL }, - { ZTI_BATCH, ZTI_FIX(5), ZTI_FIX(8), ZTI_FIX(5) }, - { ZTI_FIX(100), ZTI_NULL, ZTI_ONE, ZTI_NULL }, - { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, - { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, + { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* NULL */ + { ZTI_N(8), ZTI_NULL, ZTI_BATCH, ZTI_NULL }, /* READ */ + { ZTI_BATCH, ZTI_N(5), ZTI_N(8), ZTI_N(5) }, /* WRITE */ + { ZTI_P(12, 8), ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FREE */ + { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* CLAIM */ + { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* IOCTL */ }; static dsl_syncfunc_t spa_sync_version; @@ -817,50 +832,136 @@ spa_get_errlists(spa_t *spa, avl_tree_t *last, avl offsetof(spa_error_entry_t, se_avl)); } -static taskq_t * -spa_taskq_create(spa_t *spa, const char *name, enum zti_modes mode, - uint_t value) +static void +spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q) { + const zio_taskq_info_t *ztip = &zio_taskqs[t][q]; + enum zti_modes mode = ztip->zti_mode; + uint_t value = ztip->zti_value; + uint_t count = ztip->zti_count; + spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; + char name[32]; uint_t flags = TASKQ_PREPOPULATE; boolean_t batch = B_FALSE; - switch (mode) { - case zti_mode_null: - return (NULL); /* no taskq needed */ + if (mode == ZTI_MODE_NULL) { + tqs->stqs_count = 0; + tqs->stqs_taskq = NULL; + return; + } - case zti_mode_fixed: - ASSERT3U(value, >=, 1); - value = MAX(value, 1); - break; + ASSERT3U(count, >, 0); - case zti_mode_batch: - batch = B_TRUE; - flags |= TASKQ_THREADS_CPU_PCT; - value = zio_taskq_batch_pct; - break; + tqs->stqs_count = count; + tqs->stqs_taskq = kmem_alloc(count * sizeof (taskq_t *), KM_SLEEP); - case zti_mode_online_percent: - flags |= TASKQ_THREADS_CPU_PCT; - break; + for (uint_t i = 0; i < count; i++) { + taskq_t *tq; - default: - panic("unrecognized mode for %s taskq (%u:%u) in " - "spa_activate()", - name, mode, value); - break; - } + switch (mode) { + case ZTI_MODE_FIXED: + ASSERT3U(value, >=, 1); + value = MAX(value, 1); + break; + case ZTI_MODE_BATCH: + batch = B_TRUE; + flags |= TASKQ_THREADS_CPU_PCT; + value = zio_taskq_batch_pct; + break; + + case ZTI_MODE_ONLINE_PERCENT: + flags |= TASKQ_THREADS_CPU_PCT; + break; + + default: + panic("unrecognized mode for %s_%s taskq (%u:%u) in " + "spa_activate()", + zio_type_name[t], zio_taskq_types[q], mode, value); + break; + } + + if (count > 1) { + (void) snprintf(name, sizeof (name), "%s_%s_%u", + zio_type_name[t], zio_taskq_types[q], i); + } else { + (void) snprintf(name, sizeof (name), "%s_%s", + zio_type_name[t], zio_taskq_types[q]); + } + #ifdef SYSDC - if (zio_taskq_sysdc && spa->spa_proc != &p0) { - if (batch) - flags |= TASKQ_DC_BATCH; + if (zio_taskq_sysdc && spa->spa_proc != &p0) { + if (batch) + flags |= TASKQ_DC_BATCH; - return (taskq_create_sysdc(name, value, 50, INT_MAX, - spa->spa_proc, zio_taskq_basedc, flags)); + tq = taskq_create_sysdc(name, value, 50, INT_MAX, + spa->spa_proc, zio_taskq_basedc, flags); + } else { +#endif + tq = taskq_create_proc(name, value, maxclsyspri, 50, + INT_MAX, spa->spa_proc, flags); +#ifdef SYSDC + } +#endif + + tqs->stqs_taskq[i] = tq; } +} + +static void +spa_taskqs_fini(spa_t *spa, zio_type_t t, zio_taskq_type_t q) +{ + spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; + + if (tqs->stqs_taskq == NULL) { + ASSERT0(tqs->stqs_count); + return; + } + + for (uint_t i = 0; i < tqs->stqs_count; i++) { + ASSERT3P(tqs->stqs_taskq[i], !=, NULL); + taskq_destroy(tqs->stqs_taskq[i]); + } + + kmem_free(tqs->stqs_taskq, tqs->stqs_count * sizeof (taskq_t *)); + tqs->stqs_taskq = NULL; +} + +/* + * Dispatch a task to the appropriate taskq for the ZFS I/O type and priority. + * Note that a type may have multiple discrete taskqs to avoid lock contention + * on the taskq itself. In that case we choose which taskq at random by using + * the low bits of gethrtime(). + */ +void +spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q, +#ifdef illumos + task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent) +#else /* !illumos */ + task_func_t *func, void *arg, uint_t flags, void *task) +#endif /* illumos */ +{ + spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; + taskq_t *tq; + + ASSERT3P(tqs->stqs_taskq, !=, NULL); + ASSERT3U(tqs->stqs_count, !=, 0); + + if (tqs->stqs_count == 1) { + tq = tqs->stqs_taskq[0]; + } else { + tq = tqs->stqs_taskq[gethrtime() % tqs->stqs_count]; + } + +#ifdef illumos + taskq_dispatch_ent(tq, func, arg, flags, ent); +#else +#ifdef _KERNEL + (void) taskq_dispatch_safe(tq, func, arg, flags, (struct ostask *)task); +#else + (void) taskq_dispatch(tq, func, arg, flags); #endif - return (taskq_create_proc(name, value, maxclsyspri, 50, INT_MAX, - spa->spa_proc, flags)); +#endif /* !illumos */ } static void @@ -868,16 +969,7 @@ spa_create_zio_taskqs(spa_t *spa) { for (int t = 0; t < ZIO_TYPES; t++) { for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { - const zio_taskq_info_t *ztip = &zio_taskqs[t][q]; - enum zti_modes mode = ztip->zti_mode; - uint_t value = ztip->zti_value; - char name[32]; - - (void) snprintf(name, sizeof (name), - "%s_%s", zio_type_name[t], zio_taskq_types[q]); - - spa->spa_zio_taskq[t][q] = - spa_taskq_create(spa, name, mode, value); + spa_taskqs_init(spa, t, q); } } } @@ -1054,9 +1146,7 @@ spa_deactivate(spa_t *spa) for (int t = 0; t < ZIO_TYPES; t++) { for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { - if (spa->spa_zio_taskq[t][q] != NULL) - taskq_destroy(spa->spa_zio_taskq[t][q]); - spa->spa_zio_taskq[t][q] = NULL; + spa_taskqs_fini(spa, t, q); } } Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h (revision 247598) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h (working copy) @@ -81,16 +81,16 @@ typedef struct spa_config_dirent { char *scd_path; } spa_config_dirent_t; -enum zio_taskq_type { +typedef enum zio_taskq_type { ZIO_TASKQ_ISSUE = 0, ZIO_TASKQ_ISSUE_HIGH, ZIO_TASKQ_INTERRUPT, ZIO_TASKQ_INTERRUPT_HIGH, ZIO_TASKQ_TYPES -}; +} zio_taskq_type_t; /* - * State machine for the zpool-pooname process. The states transitions + * State machine for the zpool-poolname process. The states transitions * are done as follows: * * From To Routine @@ -108,6 +108,11 @@ typedef enum spa_proc_state { SPA_PROC_GONE /* spa_thread() is exiting, spa_proc = &p0 */ } spa_proc_state_t; +typedef struct spa_taskqs { + uint_t stqs_count; + taskq_t **stqs_taskq; +} spa_taskqs_t; + struct spa { /* * Fields protected by spa_namespace_lock. @@ -126,7 +131,7 @@ struct spa { uint8_t spa_sync_on; /* sync threads are running */ spa_load_state_t spa_load_state; /* current load operation */ uint64_t spa_import_flags; /* import specific flags */ - taskq_t *spa_zio_taskq[ZIO_TYPES][ZIO_TASKQ_TYPES]; + spa_taskqs_t spa_zio_taskq[ZIO_TYPES][ZIO_TASKQ_TYPES]; dsl_pool_t *spa_dsl_pool; boolean_t spa_is_initializing; /* true while opening pool */ metaslab_class_t *spa_normal_class; /* normal data class */ @@ -256,6 +261,14 @@ struct spa { extern const char *spa_config_path; +#ifdef illumos +extern void spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q, + task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent); +#else +extern void spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q, + task_func_t *func, void *arg, uint_t flags, void *task); +#endif + #ifdef __cplusplus } #endif Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h (revision 247598) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h (working copy) @@ -472,10 +472,13 @@ struct zio { zio_cksum_report_t *io_cksum_report; uint64_t io_ena; + /* FreeBSD only. */ #ifdef _KERNEL - /* FreeBSD only. */ - struct ostask io_task; + struct ostask io_tqent; +#else + void *io_tqent; #endif + avl_node_t io_trim_node; list_node_t io_trim_link; }; Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c (revision 247598) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c (working copy) @@ -187,6 +187,11 @@ vdev_file_io_start(zio_t *zio) zio_interrupt(zio); +#ifdef illumos + spa_taskq_dispatch_ent(spa, ZIO_TYPE_FREE, ZIO_TASKQ_ISSUE, + vdev_file_io_strategy, bp, 0, &zio->io_tqent); +#endif + return (ZIO_PIPELINE_STOP); } Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c (revision 247598) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c (working copy) @@ -1181,7 +1181,7 @@ zio_free_bp_init(zio_t *zio) */ static void -zio_taskq_dispatch(zio_t *zio, enum zio_taskq_type q, boolean_t cutinline) +zio_taskq_dispatch(zio_t *zio, zio_taskq_type_t q, boolean_t cutinline) { spa_t *spa = zio->io_spa; zio_type_t t = zio->io_type; @@ -1204,31 +1204,40 @@ static void t = ZIO_TYPE_NULL; /* - * If this is a high priority I/O, then use the high priority taskq. + * If this is a high priority I/O, then use the high priority taskq if + * available. */ if (zio->io_priority == ZIO_PRIORITY_NOW && - spa->spa_zio_taskq[t][q + 1] != NULL) + spa->spa_zio_taskq[t][q + 1].stqs_count != 0) q++; ASSERT3U(q, <, ZIO_TASKQ_TYPES); -#ifdef _KERNEL - (void) taskq_dispatch_safe(spa->spa_zio_taskq[t][q], - (task_func_t *)zio_execute, zio, flags, &zio->io_task); -#else - (void) taskq_dispatch(spa->spa_zio_taskq[t][q], - (task_func_t *)zio_execute, zio, flags); +#ifdef illumos + /* + * NB: We are assuming that the zio can only be dispatched + * to a single taskq at a time. It would be a grievous error + * to dispatch the zio to another taskq at the same time. + */ + ASSERT(zio->io_tqent.tqent_next == NULL); #endif + spa_taskq_dispatch_ent(spa, t, q, (task_func_t *)zio_execute, zio, + flags, &zio->io_tqent); } static boolean_t -zio_taskq_member(zio_t *zio, enum zio_taskq_type q) +zio_taskq_member(zio_t *zio, zio_taskq_type_t q) { kthread_t *executor = zio->io_executor; spa_t *spa = zio->io_spa; - for (zio_type_t t = 0; t < ZIO_TYPES; t++) - if (taskq_member(spa->spa_zio_taskq[t][q], executor)) - return (B_TRUE); + for (zio_type_t t = 0; t < ZIO_TYPES; t++) { + spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; + uint_t i; + for (i = 0; i < tqs->stqs_count; i++) { + if (taskq_member(tqs->stqs_taskq[i], executor)) + return (B_TRUE); + } + } return (B_FALSE); } @@ -3126,16 +3135,16 @@ zio_done(zio_t *zio) * Reexecution is potentially a huge amount of work. * Hand it off to the otherwise-unused claim taskq. */ -#ifdef _KERNEL - (void) taskq_dispatch_safe( - spa->spa_zio_taskq[ZIO_TYPE_CLAIM][ZIO_TASKQ_ISSUE], - (task_func_t *)zio_reexecute, zio, TQ_SLEEP, - &zio->io_task); +#ifdef illumos + ASSERT(zio->io_tqent.tqent_next == NULL); + spa_taskq_dispatch_ent(spa, ZIO_TYPE_CLAIM, + ZIO_TASKQ_ISSUE, (task_func_t *)zio_reexecute, zio, + 0, &zio->io_tqent); #else - (void) taskq_dispatch( - spa->spa_zio_taskq[ZIO_TYPE_CLAIM][ZIO_TASKQ_ISSUE], - (task_func_t *)zio_reexecute, zio, TQ_SLEEP); -#endif + spa_taskq_dispatch_ent(spa, ZIO_TYPE_CLAIM, + ZIO_TASKQ_ISSUE, (task_func_t *)zio_reexecute, zio, + TQ_SLEEP, &zio->io_tqent); +#endif /* !illumos */ } return (ZIO_PIPELINE_STOP); }