Skip to content

Commit 010893b

Browse files
tuxokodavidchenntnx
authored andcommitted
Define sops->free_inode() to prevent use-after-free during lookup
On Linux, when doing path lookup with LOOKUP_RCU, dentry and inode can be dereferenced without refcounts and locks. For this reason, dentry and inode must only be freed after RCU grace period. However, zfs currently frees inode in zfs_inode_destroy synchronously and we can't use GPL-only call_rcu() in zfs directly. Fortunately, on Linux 5.2 and after, if we define sops->free_inode(), the kernel will do call_rcu() for us. This issue may be triggered more easily with init_on_free=1 boot parameter: BUG: kernel NULL pointer dereference, address: 0000000000000020 RIP: 0010:selinux_inode_permission+0x10e/0x1c0 Call Trace: ? show_trace_log_lvl+0x1be/0x2d9 ? show_trace_log_lvl+0x1be/0x2d9 ? show_trace_log_lvl+0x1be/0x2d9 ? security_inode_permission+0x37/0x60 ? __die_body.cold+0x8/0xd ? no_context+0x113/0x220 ? exc_page_fault+0x6d/0x130 ? asm_exc_page_fault+0x1e/0x30 ? selinux_inode_permission+0x10e/0x1c0 security_inode_permission+0x37/0x60 link_path_walk.part.0.constprop.0+0xb5/0x360 ? path_init+0x27d/0x3c0 path_lookupat+0x3e/0x1a0 filename_lookup+0xc0/0x1d0 ? __check_object_size.part.0+0x123/0x150 ? strncpy_from_user+0x4e/0x130 ? getname_flags.part.0+0x4b/0x1c0 vfs_statx+0x72/0x120 ? ioctl_has_perm.constprop.0.isra.0+0xbd/0x120 __do_sys_newlstat+0x39/0x70 ? __x64_sys_ioctl+0x8d/0xd0 do_syscall_64+0x30/0x40 entry_SYSCALL_64_after_hwframe+0x62/0xc7 Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Alexander Motin <[email protected]> Reviewed-by: Rob Norris <[email protected]> Signed-off-by: Chunwei Chen <[email protected]> Co-authored-by: Chunwei Chen <[email protected]> Closes openzfs#17546
1 parent 7e4351f commit 010893b

File tree

5 files changed

+54
-2
lines changed

5 files changed

+54
-2
lines changed

config/kernel-free-inode.m4

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
dnl #
2+
dnl # Linux 5.2 API change
3+
dnl #
4+
AC_DEFUN([ZFS_AC_KERNEL_SRC_SOPS_FREE_INODE], [
5+
ZFS_LINUX_TEST_SRC([super_operations_free_inode], [
6+
#include <linux/fs.h>
7+
8+
static void free_inode(struct inode *) { }
9+
10+
static struct super_operations sops __attribute__ ((unused)) = {
11+
.free_inode = free_inode,
12+
};
13+
],[])
14+
])
15+
16+
AC_DEFUN([ZFS_AC_KERNEL_SOPS_FREE_INODE], [
17+
AC_MSG_CHECKING([whether sops->free_inode() exists])
18+
ZFS_LINUX_TEST_RESULT([super_operations_free_inode], [
19+
AC_MSG_RESULT(yes)
20+
AC_DEFINE(HAVE_SOPS_FREE_INODE, 1, [sops->free_inode() exists])
21+
],[
22+
AC_MSG_RESULT(no)
23+
])
24+
])

config/kernel.m4

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
132132
ZFS_AC_KERNEL_SRC_PIN_USER_PAGES
133133
ZFS_AC_KERNEL_SRC_TIMER
134134
ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_WB_ERR
135+
ZFS_AC_KERNEL_SRC_SOPS_FREE_INODE
135136
case "$host_cpu" in
136137
powerpc*)
137138
ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE
@@ -248,6 +249,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
248249
ZFS_AC_KERNEL_PIN_USER_PAGES
249250
ZFS_AC_KERNEL_TIMER
250251
ZFS_AC_KERNEL_SUPER_BLOCK_S_WB_ERR
252+
ZFS_AC_KERNEL_SOPS_FREE_INODE
251253
case "$host_cpu" in
252254
powerpc*)
253255
ZFS_AC_KERNEL_CPU_HAS_FEATURE

include/os/linux/zfs/sys/zfs_znode_impl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ struct znode;
157157

158158
extern int zfs_sync(struct super_block *, int, cred_t *);
159159
extern int zfs_inode_alloc(struct super_block *, struct inode **ip);
160+
extern void zfs_inode_free(struct inode *);
160161
extern void zfs_inode_destroy(struct inode *);
161162
extern void zfs_mark_inode_dirty(struct inode *);
162163
extern boolean_t zfs_relatime_need_update(const struct inode *);

module/os/linux/zfs/zfs_znode_os.c

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,12 @@ zfs_inode_alloc(struct super_block *sb, struct inode **ip)
371371
return (0);
372372
}
373373

374+
void
375+
zfs_inode_free(struct inode *ip)
376+
{
377+
kmem_cache_free(znode_cache, ITOZ(ip));
378+
}
379+
374380
/*
375381
* Called in multiple places when an inode should be destroyed.
376382
*/
@@ -395,8 +401,15 @@ zfs_inode_destroy(struct inode *ip)
395401
nvlist_free(zp->z_xattr_cached);
396402
zp->z_xattr_cached = NULL;
397403
}
398-
399-
kmem_cache_free(znode_cache, zp);
404+
#ifndef HAVE_SOPS_FREE_INODE
405+
/*
406+
* inode needs to be freed in RCU callback. If we have
407+
* super_operations->free_inode, Linux kernel will do call_rcu
408+
* for us. But if we don't have it, since call_rcu is GPL-only
409+
* symbol, we can only free synchronously and accept the risk.
410+
*/
411+
zfs_inode_free(ip);
412+
#endif
400413
}
401414

402415
static void

module/os/linux/zfs/zpl_super.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,15 @@ zpl_inode_alloc(struct super_block *sb)
4545
return (ip);
4646
}
4747

48+
#ifdef HAVE_SOPS_FREE_INODE
49+
static void
50+
zpl_inode_free(struct inode *ip)
51+
{
52+
ASSERT(atomic_read(&ip->i_count) == 0);
53+
zfs_inode_free(ip);
54+
}
55+
#endif
56+
4857
static void
4958
zpl_inode_destroy(struct inode *ip)
5059
{
@@ -458,6 +467,9 @@ zpl_prune_sb(uint64_t nr_to_scan, void *arg)
458467

459468
const struct super_operations zpl_super_operations = {
460469
.alloc_inode = zpl_inode_alloc,
470+
#ifdef HAVE_SOPS_FREE_INODE
471+
.free_inode = zpl_inode_free,
472+
#endif
461473
.destroy_inode = zpl_inode_destroy,
462474
.dirty_inode = zpl_dirty_inode,
463475
.write_inode = NULL,

0 commit comments

Comments
 (0)