Skip to content

Commit 6f2a475

Browse files
committed
Fix inode eviction and sync writeback deadlock on Linux
If inode eviction and sync writeback happen on the same inode at the same time, inode eviction will set I_FREEING and wait for sync writeback, and sync writeback may eventually calls zfs_get_data and loop in zfs_zget forever because igrab cannot succeed with I_FREEING, thus causing deadlock. To fix this, in zfs_get_data we call a variant of zfs_zget where we bailout on loop if I_SYNC flag is set, and force the caller to wait for txg sync. Signed-off-by: Chunwei Chen <[email protected]> Fixes #7964 Fixes #9430
1 parent 676b7ef commit 6f2a475

File tree

4 files changed

+40
-3
lines changed

4 files changed

+40
-3
lines changed

include/sys/zfs_znode.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,7 @@ extern int zfs_znode_hold_compare(const void *, const void *);
278278
extern znode_hold_t *zfs_znode_hold_enter(zfsvfs_t *, uint64_t);
279279
extern void zfs_znode_hold_exit(zfsvfs_t *, znode_hold_t *);
280280
extern int zfs_zget(zfsvfs_t *, uint64_t, znode_t **);
281+
extern int zfs_zget_impl(zfsvfs_t *, uint64_t, znode_t **, boolean_t);
281282
extern int zfs_rezget(znode_t *);
282283
extern void zfs_zinactive(znode_t *);
283284
extern void zfs_znode_delete(znode_t *, dmu_tx_t *);

module/os/freebsd/zfs/zfs_znode_os.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1051,6 +1051,13 @@ zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp)
10511051
return (err);
10521052
}
10531053

1054+
int
1055+
zfs_zget_impl(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp,
1056+
boolean_t check_sync)
1057+
{
1058+
return (zfs_zget(zfsvfs, obj_num, zpp));
1059+
}
1060+
10541061
int
10551062
zfs_rezget(znode_t *zp)
10561063
{

module/os/linux/zfs/zfs_znode_os.c

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1044,13 +1044,21 @@ zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
10441044

10451045
int
10461046
zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp)
1047+
{
1048+
return (zfs_zget_impl(zfsvfs, obj_num, zpp, B_FALSE));
1049+
}
1050+
1051+
int
1052+
zfs_zget_impl(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp,
1053+
boolean_t check_sync)
10471054
{
10481055
dmu_object_info_t doi;
10491056
dmu_buf_t *db;
10501057
znode_t *zp;
10511058
znode_hold_t *zh;
10521059
int err;
10531060
sa_handle_t *hdl;
1061+
boolean_t noloop = B_FALSE;
10541062

10551063
*zpp = NULL;
10561064

@@ -1109,8 +1117,18 @@ zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp)
11091117
if (igrab(ZTOI(zp)) == NULL) {
11101118
if (zp->z_unlinked)
11111119
err = SET_ERROR(ENOENT);
1112-
else
1120+
else {
11131121
err = SET_ERROR(EAGAIN);
1122+
/*
1123+
* In writeback path, I_SYNC flag will be set
1124+
* and block inode eviction. So we must not
1125+
* loop doing igrab in possible writeback
1126+
* path, i.e. zfs_get_data, if inode is being
1127+
* evicted and I_SYNC is also set.
1128+
*/
1129+
if (check_sync && (ZTOI(zp)->i_state & I_SYNC))
1130+
noloop = B_TRUE;
1131+
}
11141132
} else {
11151133
*zpp = zp;
11161134
err = 0;
@@ -1120,7 +1138,7 @@ zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp)
11201138
sa_buf_rele(db, NULL);
11211139
zfs_znode_hold_exit(zfsvfs, zh);
11221140

1123-
if (err == EAGAIN) {
1141+
if (err == EAGAIN && !noloop) {
11241142
/* inode might need this to finish evict */
11251143
cond_resched();
11261144
goto again;

module/zfs/zfs_vnops.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1149,10 +1149,21 @@ zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf,
11491149
ASSERT3P(lwb, !=, NULL);
11501150
ASSERT3U(size, !=, 0);
11511151

1152+
error = zfs_zget_impl(zfsvfs, object, &zp, B_TRUE);
1153+
#if defined(__linux__)
1154+
/*
1155+
* Under Linux, EAGAIN indicates the inode is being evicted and I_SYNC
1156+
* is also set possibly blocking eviction, so we can't loop in
1157+
* zfs_zget to avoid deadlock. Return EIO to force txg sync under such
1158+
* scenario.
1159+
*/
1160+
if (error == EAGAIN)
1161+
return (SET_ERROR(EIO));
1162+
#endif
11521163
/*
11531164
* Nothing to do if the file has been removed
11541165
*/
1155-
if (zfs_zget(zfsvfs, object, &zp) != 0)
1166+
if (error)
11561167
return (SET_ERROR(ENOENT));
11571168
if (zp->z_unlinked) {
11581169
/*

0 commit comments

Comments
 (0)