Skip to content

Commit 53bd6c8

Browse files
MigeljanImeribehlendorf
authored andcommitted
Add vdev property to disable vdev io scheduler
Added vdev property to disable the vdev io scheduler. The intention behind this property is to improve IOPS performance when using o_direct. Signed-off-by: MigeljanImeri <[email protected]>
1 parent 10a78e2 commit 53bd6c8

File tree

14 files changed

+212
-2
lines changed

14 files changed

+212
-2
lines changed

include/sys/fs/zfs.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,9 +385,23 @@ typedef enum {
385385
VDEV_PROP_TRIM_SUPPORT,
386386
VDEV_PROP_TRIM_ERRORS,
387387
VDEV_PROP_SLOW_IOS,
388+
VDEV_PROP_SCHEDULER,
388389
VDEV_NUM_PROPS
389390
} vdev_prop_t;
390391

392+
/*
393+
* Different scheduling behaviors for vdev prop io_scheduler.
394+
* VDEV_SCHEDULER_AUTO = Don't queue if vdev is nonrot and backed by blkdev,
395+
* queue otherwise.
396+
* VDEV_SCHEDULER_CLASSIC = Always queue.
397+
* VDEV_SCHEDULER_NONE = Never Queue.
398+
*/
399+
typedef enum {
400+
VDEV_SCHEDULER_AUTO,
401+
VDEV_SCHEDULER_CLASSIC,
402+
VDEV_SCHEDULER_NONE
403+
} vdev_scheduler_type_t;
404+
391405
/*
392406
* Dataset property functions shared between libzfs and kernel.
393407
*/

include/sys/vdev_impl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -423,6 +423,7 @@ struct vdev {
423423
boolean_t vdev_resilver_deferred; /* resilver deferred */
424424
boolean_t vdev_kobj_flag; /* kobj event record */
425425
boolean_t vdev_attaching; /* vdev attach ashift handling */
426+
boolean_t vdev_is_blkdev; /* vdev is backed by block device */
426427
vdev_queue_t vdev_queue; /* I/O deadline schedule queue */
427428
spa_aux_vdev_t *vdev_aux; /* for l2cache and spares vdevs */
428429
zio_t *vdev_probe_zio; /* root of current probe */
@@ -466,6 +467,7 @@ struct vdev {
466467
uint64_t vdev_io_t;
467468
uint64_t vdev_slow_io_n;
468469
uint64_t vdev_slow_io_t;
470+
uint64_t vdev_scheduler; /* control how I/Os are submitted */
469471
};
470472

471473
#define VDEV_PAD_SIZE (8 << 10)

lib/libzfs/libzfs.abi

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6116,7 +6116,8 @@
61166116
<enumerator name='VDEV_PROP_TRIM_SUPPORT' value='49'/>
61176117
<enumerator name='VDEV_PROP_TRIM_ERRORS' value='50'/>
61186118
<enumerator name='VDEV_PROP_SLOW_IOS' value='51'/>
6119-
<enumerator name='VDEV_NUM_PROPS' value='52'/>
6119+
<enumerator name='VDEV_PROP_SCHEDULER' value='52'/>
6120+
<enumerator name='VDEV_NUM_PROPS' value='53'/>
61206121
</enum-decl>
61216122
<typedef-decl name='vdev_prop_t' type-id='1573bec8' id='5aa5c90c'/>
61226123
<class-decl name='zpool_load_policy' size-in-bits='256' is-struct='yes' visibility='default' id='2f65b36f'>

man/man7/vdevprops.7

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,22 @@ If this device should perform new allocations, used to disable a device
157157
when it is scheduled for later removal.
158158
See
159159
.Xr zpool-remove 8 .
160+
.It Sy io_scheduler Ns = Ns Sy auto Ns | Ns Sy classic Ns | Ns Sy none
161+
Controls how I/O requests are added to the vdev queue when reading or
162+
writing to this vdev.
163+
.It Sy auto
164+
Does not add I/O requests to the vdev queue if the vdev is backed by a
165+
non-rotational block device.
166+
This can sometimes improve performance for direct IOs.
167+
I/O requests will be added to the vdev queue if the vdev is backed by a
168+
rotational block device or file.
169+
This is the default behavior.
170+
.It Sy classic
171+
Always adds I/O requests to the vdev queue.
172+
.It Sy none
173+
Never adds I/O requests to the vdev queue.
174+
This is not recommended for vdevs backed by spinning disks as it could
175+
result in starvation.
160176
.El
161177
.Ss User Properties
162178
In addition to the standard native properties, ZFS supports arbitrary user

module/os/freebsd/zfs/vdev_geom.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -968,6 +968,9 @@ vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
968968
else
969969
vd->vdev_nonrot = B_FALSE;
970970

971+
/* Is backed by a block device. */
972+
vd->vdev_is_blkdev = B_TRUE;
973+
971974
/* Set when device reports it supports TRIM. */
972975
error = g_getattr("GEOM::candelete", cp, &has_trim);
973976
vd->vdev_has_trim = (error == 0 && has_trim);

module/os/linux/zfs/vdev_disk.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,9 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
447447
/* Inform the ZIO pipeline that we are non-rotational */
448448
v->vdev_nonrot = blk_queue_nonrot(bdev_get_queue(bdev));
449449

450+
/* Is backed by a block device. */
451+
v->vdev_is_blkdev = B_TRUE;
452+
450453
/* Physical volume size in bytes for the partition */
451454
*psize = bdev_capacity(bdev);
452455

module/zcommon/zpool_prop.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,13 @@ vdev_prop_init(void)
326326
{ NULL }
327327
};
328328

329+
static const zprop_index_t vdevschedulertype_table[] = {
330+
{ "auto", VDEV_SCHEDULER_AUTO },
331+
{ "classic", VDEV_SCHEDULER_CLASSIC },
332+
{ "none", VDEV_SCHEDULER_NONE },
333+
{ NULL }
334+
};
335+
329336
struct zfs_mod_supported_features *sfeatures =
330337
zfs_mod_list_supported(ZFS_SYSFS_VDEV_PROPERTIES);
331338

@@ -470,6 +477,10 @@ vdev_prop_init(void)
470477
zprop_register_index(VDEV_PROP_TRIM_SUPPORT, "trim_support", 0,
471478
PROP_READONLY, ZFS_TYPE_VDEV, "on | off", "TRIMSUP",
472479
boolean_table, sfeatures);
480+
zprop_register_index(VDEV_PROP_SCHEDULER, "scheduler",
481+
VDEV_SCHEDULER_AUTO, PROP_DEFAULT, ZFS_TYPE_VDEV,
482+
"auto | classic | none", "IO_SCHEDULER", vdevschedulertype_table,
483+
sfeatures);
473484

474485
/* default index properties */
475486
zprop_register_index(VDEV_PROP_FAILFAST, "failfast", B_TRUE,

module/zfs/vdev.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -722,6 +722,8 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
722722
vd->vdev_slow_io_n = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_N);
723723
vd->vdev_slow_io_t = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_T);
724724

725+
vd->vdev_scheduler = vdev_prop_default_numeric(VDEV_PROP_SCHEDULER);
726+
725727
list_link_init(&vd->vdev_config_dirty_node);
726728
list_link_init(&vd->vdev_state_dirty_node);
727729
list_link_init(&vd->vdev_initialize_node);
@@ -3890,6 +3892,12 @@ vdev_load(vdev_t *vd)
38903892
if (error && error != ENOENT)
38913893
vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
38923894
"failed [error=%d]", (u_longlong_t)zapobj, error);
3895+
3896+
error = vdev_prop_get_int(vd, VDEV_PROP_SCHEDULER,
3897+
&vd->vdev_scheduler);
3898+
if (error && error != ENOENT)
3899+
vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
3900+
"failed [error=%d]", (u_longlong_t)zapobj, error);
38933901
}
38943902

38953903
/*
@@ -6125,6 +6133,15 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
61256133
}
61266134
vd->vdev_slow_io_t = intval;
61276135
break;
6136+
case VDEV_PROP_SCHEDULER:
6137+
if (nvpair_value_uint64(elem, &intval) != 0) {
6138+
error = EINVAL;
6139+
break;
6140+
}
6141+
if (vd->vdev_ops->vdev_op_leaf) {
6142+
vd->vdev_scheduler = intval;
6143+
}
6144+
break;
61286145
default:
61296146
/* Most processing is done in vdev_props_set_sync */
61306147
break;
@@ -6488,6 +6505,7 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
64886505
case VDEV_PROP_IO_T:
64896506
case VDEV_PROP_SLOW_IO_N:
64906507
case VDEV_PROP_SLOW_IO_T:
6508+
case VDEV_PROP_SCHEDULER:
64916509
err = vdev_prop_get_int(vd, prop, &intval);
64926510
if (err && err != ENOENT)
64936511
break;

module/zfs/vdev_file.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,9 @@ vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
109109
*/
110110
vd->vdev_nonrot = B_TRUE;
111111

112+
/* Is not backed by a block device. */
113+
vd->vdev_is_blkdev = B_FALSE;
114+
112115
/*
113116
* Allow TRIM on file based vdevs. This may not always be supported,
114117
* since it depends on your kernel version and underlying filesystem

module/zfs/vdev_queue.c

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -879,6 +879,38 @@ vdev_queue_io_to_issue(vdev_queue_t *vq)
879879
return (zio);
880880
}
881881

882+
static boolean_t
883+
vdev_should_queue_zio(zio_t *zio)
884+
{
885+
vdev_t *vd = zio->io_vd;
886+
boolean_t should_queue = B_TRUE;
887+
888+
/*
889+
* Add zio with ZIO_FLAG_NODATA to queue as bypass code
890+
* currently does not handle certain cases (gang abd, raidz
891+
* write aggregation).
892+
*/
893+
if (zio->io_flags & ZIO_FLAG_NODATA)
894+
return (B_TRUE);
895+
896+
switch (vd->vdev_scheduler) {
897+
case VDEV_SCHEDULER_AUTO:
898+
if (vd->vdev_nonrot && vd->vdev_is_blkdev)
899+
should_queue = B_FALSE;
900+
break;
901+
case VDEV_SCHEDULER_CLASSIC:
902+
should_queue = B_TRUE;
903+
break;
904+
case VDEV_SCHEDULER_NONE:
905+
should_queue = B_FALSE;
906+
break;
907+
default:
908+
should_queue = B_TRUE;
909+
break;
910+
}
911+
return (should_queue);
912+
}
913+
882914
zio_t *
883915
vdev_queue_io(zio_t *zio)
884916
{
@@ -922,6 +954,11 @@ vdev_queue_io(zio_t *zio)
922954
zio->io_flags |= ZIO_FLAG_DONT_QUEUE;
923955
zio->io_timestamp = gethrtime();
924956

957+
if (!vdev_should_queue_zio(zio)) {
958+
zio->io_queue_state = ZIO_QS_NONE;
959+
return (zio);
960+
}
961+
925962
mutex_enter(&vq->vq_lock);
926963
vdev_queue_io_add(vq, zio);
927964
nio = vdev_queue_io_to_issue(vq);
@@ -954,6 +991,10 @@ vdev_queue_io_done(zio_t *zio)
954991
vq->vq_io_complete_ts = now;
955992
vq->vq_io_delta_ts = zio->io_delta = now - zio->io_timestamp;
956993

994+
if (zio->io_queue_state == ZIO_QS_NONE) {
995+
return;
996+
}
997+
957998
mutex_enter(&vq->vq_lock);
958999
vdev_queue_pending_remove(vq, zio);
9591000

0 commit comments

Comments
 (0)