Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions config/kernel-dentry-operations.m4
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,37 @@ AC_DEFUN([ZFS_AC_KERNEL_D_SET_D_OP], [
])
])

dnl #
dnl # 6.17 API change
dnl # sb->s_d_op removed; set_default_d_op(sb, dop) added
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_SET_DEFAULT_D_OP], [
ZFS_LINUX_TEST_SRC([set_default_d_op], [
#include <linux/dcache.h>
], [
set_default_d_op(NULL, NULL);
])
])

AC_DEFUN([ZFS_AC_KERNEL_SET_DEFAULT_D_OP], [
AC_MSG_CHECKING([whether set_default_d_op() is available])
ZFS_LINUX_TEST_RESULT([set_default_d_op], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_SET_DEFAULT_D_OP, 1,
[Define if set_default_d_op() is available])
], [
AC_MSG_RESULT(no)
])
])

AC_DEFUN([ZFS_AC_KERNEL_SRC_DENTRY], [
ZFS_AC_KERNEL_SRC_D_OBTAIN_ALIAS
ZFS_AC_KERNEL_SRC_D_SET_D_OP
ZFS_AC_KERNEL_SRC_SET_DEFAULT_D_OP
])

AC_DEFUN([ZFS_AC_KERNEL_DENTRY], [
ZFS_AC_KERNEL_D_OBTAIN_ALIAS
ZFS_AC_KERNEL_D_SET_D_OP
ZFS_AC_KERNEL_SET_DEFAULT_D_OP
])
1 change: 1 addition & 0 deletions include/os/linux/zfs/sys/zpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ extern const struct file_operations zpl_dir_file_operations;
extern void zpl_prune_sb(uint64_t nr_to_scan, void *arg);

extern const struct super_operations zpl_super_operations;
extern const struct dentry_operations zpl_dentry_operations;
extern const struct export_operations zpl_export_operations;
extern struct file_system_type zpl_fs_type;

Expand Down
45 changes: 44 additions & 1 deletion man/man4/zfs.4
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
.\" own identifying information:
.\" Portions Copyright [yyyy] [name of copyright owner]
.\"
.Dd August 14, 2025
.Dd September 15, 2025
.Dt ZFS 4
.Os
.
Expand Down Expand Up @@ -2583,6 +2583,49 @@ the xattr so as to not accumulate duplicates.
.It Sy zio_requeue_io_start_cut_in_line Ns = Ns Sy 0 Ns | Ns 1 Pq int
Prioritize requeued I/O.
.
.It Sy zfs_delete_inode Ns = Ns Sy 0 Ns | Ns 1 Pq int
Sets whether the kernel should free an inode structure when the last reference
is released, or cache it in memory.
Intended for testing/debugging.
.Pp
A live inode structure "pins" versious internal OpenZFS structures in memory,
which can result in large amounts of "unusable" memory on systems with lots of
infrequently-accessed files, until the kernel's memory pressure mechanism
asks OpenZFS to release them.
.Pp
The default value of
.Sy 0
always caches inodes that appear to still exist on disk.
Setting it to
.Sy 1
will immediately release unused inodes and their associated memory back to the
dbuf cache or the ARC for reuse, but may reduce performance if inodes are
frequently evicted and reloaded.
.Pp
This parameter is only available on Linux.
.
.It Sy zfs_delete_dentry Ns = Ns Sy 0 Ns | Ns 1 Pq int
Sets whether the kernel should free a dentry structure when it is no longer
required, or hold it in the dentry cache.
Intended for testing/debugging.
.
Since a dentry structure holds an inode reference, a cached dentry can "pin"
an inode in memory indefinitely, along with associated OpenZFS structures (See
.Sy zfs_delete_inode ) .
.Pp
The default value of
.Sy 0
instructs the kernel to cache entries and their associated inodes when they
are no longer directly referenced.
They will be reclaimed as part of the kernel's normal cache management
processes.
Setting it to
.Sy 1
will instruct the kernel to release directory entries and their inodes as soon
as they are no longer referenced by the filesystem.
.Pp
This parameter is only available on Linux.
.
.It Sy zio_taskq_batch_pct Ns = Ns Sy 80 Ns % Pq uint
Percentage of online CPUs which will run a worker thread for I/O.
These workers are responsible for I/O work such as compression, encryption,
Expand Down
6 changes: 6 additions & 0 deletions module/os/linux/zfs/zfs_vfsops.c
Original file line number Diff line number Diff line change
Expand Up @@ -1556,6 +1556,12 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
sb->s_xattr = zpl_xattr_handlers;
sb->s_export_op = &zpl_export_operations;

#ifdef HAVE_SET_DEFAULT_D_OP
set_default_d_op(sb, &zpl_dentry_operations);
#else
sb->s_d_op = &zpl_dentry_operations;
#endif

/* Set features for file system. */
zfs_set_fuid_feature(zfsvfs);

Expand Down
85 changes: 81 additions & 4 deletions module/os/linux/zfs/zpl_super.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
/*
* Copyright (c) 2011, Lawrence Livermore National Security, LLC.
* Copyright (c) 2023, Datto Inc. All rights reserved.
* Copyright (c) 2025, Klara, Inc.
*/


Expand All @@ -33,6 +34,20 @@
#include <linux/iversion.h>
#include <linux/version.h>

/*
* What to do when the last reference to an inode is released. If 0, the kernel
* will cache it on the superblock. If 1, the inode will be freed immediately.
* See zpl_drop_inode().
*/
int zfs_delete_inode = 0;

/*
* What to do when the last reference to a dentry is released. If 0, the kernel
* will cache it until the entry (file) is destroyed. If 1, the dentry will be
* marked for cleanup, at which time its inode reference will be released. See
* zpl_dentry_delete().
*/
int zfs_delete_dentry = 0;

static struct inode *
zpl_inode_alloc(struct super_block *sb)
Expand Down Expand Up @@ -77,11 +92,36 @@ zpl_dirty_inode(struct inode *ip, int flags)
}

/*
* When ->drop_inode() is called its return value indicates if the
* inode should be evicted from the inode cache. If the inode is
* unhashed and has no links the default policy is to evict it
* immediately.
* ->drop_inode() is called when the last reference to an inode is released.
* Its return value indicates if the inode should be destroyed immediately, or
* cached on the superblock structure.
*
* By default (zfs_delete_inode=0), we call generic_drop_inode(), which returns
* "destroy immediately" if the inode is unhashed and has no links (roughly: no
* longer exists on disk). On datasets with millions of rarely-accessed files,
* this can cause a large amount of memory to be "pinned" by cached inodes,
* which in turn pin their associated dnodes and dbufs, until the kernel starts
* reporting memory pressure and requests OpenZFS release some memory (see
* zfs_prune()).
*
* When set to 1, we call generic_delete_node(), which always returns "destroy
* immediately", resulting in inodes being destroyed immediately, releasing
* their associated dnodes and dbufs to the dbuf cached and the ARC to be
* evicted as normal.
*
* Note that the "last reference" doesn't always mean the last _userspace_
* reference; the dentry cache also holds a reference, so "busy" inodes will
* still be kept alive that way (subject to dcache tuning).
*/
static int
zpl_drop_inode(struct inode *ip)
{
if (zfs_delete_inode)
return (generic_delete_inode(ip));
return (generic_drop_inode(ip));
}

/*
* The ->evict_inode() callback must minimally truncate the inode pages,
* and call clear_inode(). For 2.6.35 and later kernels this will
* simply update the inode state, with the sync occurring before the
Expand Down Expand Up @@ -470,6 +510,7 @@ const struct super_operations zpl_super_operations = {
.destroy_inode = zpl_inode_destroy,
.dirty_inode = zpl_dirty_inode,
.write_inode = NULL,
.drop_inode = zpl_drop_inode,
.evict_inode = zpl_evict_inode,
.put_super = zpl_put_super,
.sync_fs = zpl_sync_fs,
Expand All @@ -480,6 +521,35 @@ const struct super_operations zpl_super_operations = {
.show_stats = NULL,
};

/*
* ->d_delete() is called when the last reference to a dentry is released. Its
* return value indicates if the dentry should be destroyed immediately, or
* retained in the dentry cache.
*
* By default (zfs_delete_dentry=0) the kernel will always cache unused
* entries. Each dentry holds an inode reference, so cached dentries can hold
* the final inode reference indefinitely, leading to the inode and its related
* data being pinned (see zpl_drop_inode()).
*
* When set to 1, we signal that the dentry should be destroyed immediately and
* never cached. This reduces memory usage, at the cost of higher overheads to
* lookup a file, as the inode and its underlying data (dnode/dbuf) need to be
* reloaded and reinflated.
*
* Note that userspace does not have direct control over dentry references and
* reclaim; rather, this is part of the kernel's caching and reclaim subsystems
* (eg vm.vfs_cache_pressure).
*/
static int
zpl_dentry_delete(const struct dentry *dentry)
{
return (zfs_delete_dentry ? 1 : 0);
}

const struct dentry_operations zpl_dentry_operations = {
.d_delete = zpl_dentry_delete,
};

struct file_system_type zpl_fs_type = {
.owner = THIS_MODULE,
.name = ZFS_DRIVER,
Expand All @@ -491,3 +561,10 @@ struct file_system_type zpl_fs_type = {
.mount = zpl_mount,
.kill_sb = zpl_kill_sb,
};

ZFS_MODULE_PARAM(zfs, zfs_, delete_inode, INT, ZMOD_RW,
"Delete inodes as soon as the last reference is released.");

ZFS_MODULE_PARAM(zfs, zfs_, delete_dentry, INT, ZMOD_RW,
"Delete dentries from dentry cache as soon as the last reference is "
"released.");
Loading