Skip to content

Commit cf14646

Browse files
authored
Default to zfs_bclone_wait_dirty=1
Update the default FICLONE and FICLONERANGE ioctl behavior to wait on dirty blocks. While this does remove some control from the application, in practice ZFS is better positioned to the optimial thing and immediately force a TXG sync. Reviewed-by: Rob Norris <[email protected]> Reviewed-by: Alexander Motin <[email protected]> Reviewed-by: George Melikov <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]> Closes #17455
1 parent 4bd7a2e commit cf14646

File tree

4 files changed

+39
-14
lines changed

4 files changed

+39
-14
lines changed

man/man4/zfs.4

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1399,14 +1399,15 @@ If this setting is 0, then even if feature@block_cloning is enabled,
13991399
using functions and system calls that attempt to clone blocks will act as
14001400
though the feature is disabled.
14011401
.
1402-
.It Sy zfs_bclone_wait_dirty Ns = Ns Sy 0 Ns | Ns 1 Pq int
1403-
When set to 1 the FICLONE and FICLONERANGE ioctls wait for dirty data to be
1404-
written to disk.
1405-
This allows the clone operation to reliably succeed when a file is
1402+
.It Sy zfs_bclone_wait_dirty Ns = Ns Sy 1 Ns | Ns 0 Pq int
1403+
When set to 1 the FICLONE and FICLONERANGE ioctls will wait for any dirty
1404+
data to be written to disk before proceeding.
1405+
This ensures that the clone operation reliably succeeds, even if a file is
14061406
modified and then immediately cloned.
1407-
For small files this may be slower than making a copy of the file.
1408-
Therefore, this setting defaults to 0 which causes a clone operation to
1409-
immediately fail when encountering a dirty block.
1407+
Note that for small files this may be slower than simply copying the file.
1408+
When set to 0 the clone operation will immediately fail if it encounters
1409+
any dirty blocks.
1410+
By default waiting is enabled.
14101411
.
14111412
.It Sy zfs_blake3_impl Ns = Ns Sy fastest Pq string
14121413
Select a BLAKE3 implementation.

module/zfs/zfs_vnops.c

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -67,13 +67,14 @@
6767
int zfs_bclone_enabled = 1;
6868

6969
/*
70-
* When set zfs_clone_range() waits for dirty data to be written to disk.
71-
* This allows the clone operation to reliably succeed when a file is modified
72-
* and then immediately cloned. For small files this may be slower than making
73-
* a copy of the file and is therefore not the default. However, in certain
74-
* scenarios this behavior may be desirable so a tunable is provided.
70+
* When set to 1 the FICLONE and FICLONERANGE ioctls will wait for any dirty
71+
* data to be written to disk before proceeding. This ensures that the clone
72+
* operation reliably succeeds, even if a file is modified and then immediately
73+
* cloned. Note that for small files this may be slower than simply copying
74+
* the file. When set to 0 the clone operation will immediately fail if it
75+
* encounters any dirty blocks. By default waiting is enabled.
7576
*/
76-
int zfs_bclone_wait_dirty = 0;
77+
int zfs_bclone_wait_dirty = 1;
7778

7879
/*
7980
* Enable Direct I/O. If this setting is 0, then all I/O requests will be

tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,16 +41,22 @@ function cleanup
4141
{
4242
datasetexists $TESTPOOL && destroy_pool $TESTPOOL
4343
set_tunable64 TXG_TIMEOUT $timeout
44+
log_must restore_tunable BCLONE_WAIT_DIRTY
4445
}
4546

4647
log_onexit cleanup
4748

49+
log_must save_tunable BCLONE_WAIT_DIRTY
50+
4851
log_must set_tunable64 TXG_TIMEOUT 5000
4952

5053
log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $DISKS
5154

5255
log_must sync_pool $TESTPOOL true
5356

57+
# Verify fallback to copy when there are dirty blocks
58+
log_must set_tunable32 BCLONE_WAIT_DIRTY 0
59+
5460
log_must dd if=/dev/urandom of=/$TESTPOOL/file bs=128K count=4
5561
log_must clonefile -f /$TESTPOOL/file /$TESTPOOL/clone 0 0 524288
5662

@@ -61,5 +67,20 @@ log_must have_same_content /$TESTPOOL/file /$TESTPOOL/clone
6167
typeset blocks=$(get_same_blocks $TESTPOOL file $TESTPOOL clone)
6268
log_must [ "$blocks" = "" ]
6369

70+
log_must rm /$TESTPOOL/file /$TESTPOOL/clone
71+
72+
# Verify blocks are cloned even when there are dirty blocks
73+
log_must set_tunable32 BCLONE_WAIT_DIRTY 1
74+
75+
log_must dd if=/dev/urandom of=/$TESTPOOL/file bs=128K count=4
76+
log_must clonefile -f /$TESTPOOL/file /$TESTPOOL/clone 0 0 524288
77+
78+
log_must sync_pool $TESTPOOL
79+
80+
log_must have_same_content /$TESTPOOL/file /$TESTPOOL/clone
81+
82+
typeset blocks=$(get_same_blocks $TESTPOOL file $TESTPOOL clone)
83+
log_must [ "$blocks" = "0 1 2 3" ]
84+
6485
log_pass $claim
6586

tests/zfs-tests/tests/functional/cp_files/cp_files_002_pos.ksh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ function cleanup
5656
{
5757
datasetexists $TESTPOOL/cp-reflink && \
5858
destroy_dataset $$TESTPOOL/cp-reflink -f
59-
log_must set_tunable32 BCLONE_WAIT_DIRTY 0
59+
log_must restore_tunable BCLONE_WAIT_DIRTY
6060
}
6161

6262
function verify_copy
@@ -81,6 +81,8 @@ SRC_SIZE=$((1024 + $RANDOM % 1024))
8181
# A smaller recordsize is used merely to speed up the test.
8282
RECORDSIZE=4096
8383

84+
log_must save_tunable BCLONE_WAIT_DIRTY
85+
8486
log_must zfs create -o recordsize=$RECORDSIZE $TESTPOOL/cp-reflink
8587
CP_TESTDIR=$(get_prop mountpoint $TESTPOOL/cp-reflink)
8688

0 commit comments

Comments
 (0)