@@ -1200,7 +1200,8 @@ dbuf_verify(dmu_buf_impl_t *db)
12001200 if ((db -> db_blkptr == NULL || BP_IS_HOLE (db -> db_blkptr )) &&
12011201 (db -> db_buf == NULL || db -> db_buf -> b_data ) &&
12021202 db -> db .db_data && db -> db_blkid != DMU_BONUS_BLKID &&
1203- db -> db_state != DB_FILL && (dn == NULL || !dn -> dn_free_txg )) {
1203+ db -> db_state != DB_FILL && (dn == NULL || !dn -> dn_free_txg ) &&
1204+ RW_LOCK_HELD (& db -> db_rwlock )) {
12041205 /*
12051206 * If the blkptr isn't set but they have nonzero data,
12061207 * it had better be dirty, otherwise we'll lose that
@@ -1704,7 +1705,9 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
17041705 int bonuslen = DN_SLOTS_TO_BONUSLEN (dn -> dn_num_slots );
17051706 dr -> dt .dl .dr_data = kmem_alloc (bonuslen , KM_SLEEP );
17061707 arc_space_consume (bonuslen , ARC_SPACE_BONUS );
1708+ rw_enter (& db -> db_rwlock , RW_READER );
17071709 memcpy (dr -> dt .dl .dr_data , db -> db .db_data , bonuslen );
1710+ rw_exit (& db -> db_rwlock );
17081711 } else if (zfs_refcount_count (& db -> db_holds ) > db -> db_dirtycnt ) {
17091712 dnode_t * dn = DB_DNODE (db );
17101713 int size = arc_buf_size (db -> db_buf );
@@ -1734,7 +1737,9 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
17341737 } else {
17351738 dr -> dt .dl .dr_data = arc_alloc_buf (spa , db , type , size );
17361739 }
1740+ rw_enter (& db -> db_rwlock , RW_READER );
17371741 memcpy (dr -> dt .dl .dr_data -> b_data , db -> db .db_data , size );
1742+ rw_exit (& db -> db_rwlock );
17381743 } else {
17391744 db -> db_buf = NULL ;
17401745 dbuf_clear_data (db );
@@ -3006,7 +3011,9 @@ dmu_buf_fill_done(dmu_buf_t *dbuf, dmu_tx_t *tx, boolean_t failed)
30063011 ASSERT (db -> db_blkid != DMU_BONUS_BLKID );
30073012 /* we were freed while filling */
30083013 /* XXX dbuf_undirty? */
3014+ rw_enter (& db -> db_rwlock , RW_WRITER );
30093015 memset (db -> db .db_data , 0 , db -> db .db_size );
3016+ rw_exit (& db -> db_rwlock );
30103017 db -> db_freed_in_flight = FALSE;
30113018 db -> db_state = DB_CACHED ;
30123019 DTRACE_SET_STATE (db ,
@@ -3381,12 +3388,14 @@ dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse,
33813388 * parentp = NULL ;
33823389 return (err );
33833390 }
3391+ mutex_enter (& (* parentp )-> db_mtx );
33843392 rw_enter (& (* parentp )-> db_rwlock , RW_READER );
33853393 * bpp = ((blkptr_t * )(* parentp )-> db .db_data ) +
33863394 (blkid & ((1ULL << epbs ) - 1 ));
33873395 if (blkid > (dn -> dn_phys -> dn_maxblkid >> (level * epbs )))
33883396 ASSERT (BP_IS_HOLE (* bpp ));
33893397 rw_exit (& (* parentp )-> db_rwlock );
3398+ mutex_exit (& (* parentp )-> db_mtx );
33903399 return (0 );
33913400 } else {
33923401 /* the block is referenced from the dnode */
@@ -4570,10 +4579,12 @@ dbuf_lightweight_bp(dbuf_dirty_record_t *dr)
45704579 return (& dn -> dn_phys -> dn_blkptr [dr -> dt .dll .dr_blkid ]);
45714580 } else {
45724581 dmu_buf_impl_t * parent_db = dr -> dr_parent -> dr_dbuf ;
4582+ ASSERT (MUTEX_HELD (& parent_db -> db_mtx ));
45734583 int epbs = dn -> dn_indblkshift - SPA_BLKPTRSHIFT ;
45744584 VERIFY3U (parent_db -> db_level , = = , 1 );
45754585 VERIFY3P (DB_DNODE (parent_db ), = = , dn );
45764586 VERIFY3U (dr -> dt .dll .dr_blkid >> epbs , = = , parent_db -> db_blkid );
4587+ ASSERT (RW_LOCK_HELD (& parent_db -> db_rwlock ));
45774588 blkptr_t * bp = parent_db -> db .db_data ;
45784589 return (& bp [dr -> dt .dll .dr_blkid & ((1 << epbs ) - 1 )]);
45794590 }
@@ -4584,12 +4595,22 @@ dbuf_lightweight_ready(zio_t *zio)
45844595{
45854596 dbuf_dirty_record_t * dr = zio -> io_private ;
45864597 blkptr_t * bp = zio -> io_bp ;
4598+ dmu_buf_impl_t * parent_db = NULL ;
45874599
45884600 if (zio -> io_error != 0 )
45894601 return ;
45904602
45914603 dnode_t * dn = dr -> dr_dnode ;
45924604
4605+ EQUIV (dr -> dr_parent == NULL , dn -> dn_phys -> dn_nlevels == 1 );
4606+ if (dr -> dr_parent == NULL ) {
4607+ parent_db = dn -> dn_dbuf ;
4608+ } else {
4609+ parent_db = dr -> dr_parent -> dr_dbuf ;
4610+ }
4611+ mutex_enter (& parent_db -> db_mtx );
4612+
4613+ rw_enter (& parent_db -> db_rwlock , RW_READER );
45934614 blkptr_t * bp_orig = dbuf_lightweight_bp (dr );
45944615 spa_t * spa = dmu_objset_spa (dn -> dn_objset );
45954616 int64_t delta = bp_get_dsize_sync (spa , bp ) -
@@ -4609,16 +4630,13 @@ dbuf_lightweight_ready(zio_t *zio)
46094630 BP_SET_FILL (bp , fill );
46104631 }
46114632
4612- dmu_buf_impl_t * parent_db ;
4613- EQUIV (dr -> dr_parent == NULL , dn -> dn_phys -> dn_nlevels == 1 );
4614- if (dr -> dr_parent == NULL ) {
4615- parent_db = dn -> dn_dbuf ;
4616- } else {
4617- parent_db = dr -> dr_parent -> dr_dbuf ;
4633+ if (!rw_tryupgrade (& parent_db -> db_rwlock )) {
4634+ rw_exit (& parent_db -> db_rwlock );
4635+ rw_enter (& parent_db -> db_rwlock , RW_WRITER );
46184636 }
4619- rw_enter (& parent_db -> db_rwlock , RW_WRITER );
46204637 * bp_orig = * bp ;
46214638 rw_exit (& parent_db -> db_rwlock );
4639+ mutex_exit (& parent_db -> db_mtx );
46224640}
46234641
46244642static void
@@ -4650,6 +4668,7 @@ noinline static void
46504668dbuf_sync_lightweight (dbuf_dirty_record_t * dr , dmu_tx_t * tx )
46514669{
46524670 dnode_t * dn = dr -> dr_dnode ;
4671+ dmu_buf_impl_t * parent_db = NULL ;
46534672 zio_t * pio ;
46544673 if (dn -> dn_phys -> dn_nlevels == 1 ) {
46554674 pio = dn -> dn_zio ;
@@ -4668,6 +4687,11 @@ dbuf_sync_lightweight(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
46684687 * See comment in dbuf_write(). This is so that zio->io_bp_orig
46694688 * will have the old BP in dbuf_lightweight_done().
46704689 */
4690+ if (dr -> dr_dnode -> dn_phys -> dn_nlevels != 1 ) {
4691+ parent_db = dr -> dr_parent -> dr_dbuf ;
4692+ mutex_enter (& parent_db -> db_mtx );
4693+ rw_enter (& parent_db -> db_rwlock , RW_READER );
4694+ }
46714695 dr -> dr_bp_copy = * dbuf_lightweight_bp (dr );
46724696
46734697 dr -> dr_zio = zio_write (pio , dmu_objset_spa (dn -> dn_objset ),
@@ -4677,6 +4701,11 @@ dbuf_sync_lightweight(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
46774701 dbuf_lightweight_done , dr , ZIO_PRIORITY_ASYNC_WRITE ,
46784702 ZIO_FLAG_MUSTSUCCEED | dr -> dt .dll .dr_flags , & zb );
46794703
4704+ if (parent_db ) {
4705+ rw_exit (& parent_db -> db_rwlock );
4706+ mutex_exit (& parent_db -> db_mtx );
4707+ }
4708+
46804709 zio_nowait (dr -> dr_zio );
46814710}
46824711
@@ -4833,7 +4862,9 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
48334862 } else {
48344863 * datap = arc_alloc_buf (os -> os_spa , db , type , psize );
48354864 }
4865+ rw_enter (& db -> db_rwlock , RW_READER );
48364866 memcpy ((* datap )-> b_data , db -> db .db_data , psize );
4867+ rw_exit (& db -> db_rwlock );
48374868 }
48384869 db -> db_data_pending = dr ;
48394870
@@ -4939,6 +4970,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
49394970
49404971 if (dn -> dn_type == DMU_OT_DNODE ) {
49414972 i = 0 ;
4973+ rw_enter (& db -> db_rwlock , RW_READER );
49424974 while (i < db -> db .db_size ) {
49434975 dnode_phys_t * dnp =
49444976 (void * )(((char * )db -> db .db_data ) + i );
@@ -4964,6 +4996,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
49644996 DNODE_MIN_SIZE ;
49654997 }
49664998 }
4999+ rw_exit (& db -> db_rwlock );
49675000 } else {
49685001 if (BP_IS_HOLE (bp )) {
49695002 fill = 0 ;
@@ -4972,6 +5005,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
49725005 }
49735006 }
49745007 } else {
5008+ rw_enter (& db -> db_rwlock , RW_READER );
49755009 blkptr_t * ibp = db -> db .db_data ;
49765010 ASSERT3U (db -> db .db_size , = = , 1 <<dn -> dn_phys -> dn_indblkshift );
49775011 for (i = db -> db .db_size >> SPA_BLKPTRSHIFT ; i > 0 ; i -- , ibp ++ ) {
@@ -4981,6 +5015,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
49815015 BLK_CONFIG_SKIP , BLK_VERIFY_HALT );
49825016 fill += BP_GET_FILL (ibp );
49835017 }
5018+ rw_exit (& db -> db_rwlock );
49845019 }
49855020 DB_DNODE_EXIT (db );
49865021
@@ -5015,6 +5050,8 @@ dbuf_write_children_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
50155050 DB_DNODE_EXIT (db );
50165051 ASSERT3U (epbs , < , 31 );
50175052
5053+ mutex_enter (& db -> db_mtx );
5054+ rw_enter (& db -> db_rwlock , RW_READER );
50185055 /* Determine if all our children are holes */
50195056 for (i = 0 , bp = db -> db .db_data ; i < 1ULL << epbs ; i ++ , bp ++ ) {
50205057 if (!BP_IS_HOLE (bp ))
@@ -5031,10 +5068,14 @@ dbuf_write_children_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
50315068 * anybody from reading the blocks we're about to
50325069 * zero out.
50335070 */
5034- rw_enter (& db -> db_rwlock , RW_WRITER );
5071+ if (!rw_tryupgrade (& db -> db_rwlock )) {
5072+ rw_exit (& db -> db_rwlock );
5073+ rw_enter (& db -> db_rwlock , RW_WRITER );
5074+ }
50355075 memset (db -> db .db_data , 0 , db -> db .db_size );
5036- rw_exit (& db -> db_rwlock );
50375076 }
5077+ rw_exit (& db -> db_rwlock );
5078+ mutex_exit (& db -> db_mtx );
50385079}
50395080
50405081static void
@@ -5230,11 +5271,11 @@ dbuf_remap_impl(dnode_t *dn, blkptr_t *bp, krwlock_t *rw, dmu_tx_t *tx)
52305271 * avoid lock contention, only grab it when we are actually
52315272 * changing the BP.
52325273 */
5233- if (rw != NULL )
5274+ if (rw != NULL && !rw_tryupgrade (rw )) {
5275+ rw_exit (rw );
52345276 rw_enter (rw , RW_WRITER );
5277+ }
52355278 * bp = bp_copy ;
5236- if (rw != NULL )
5237- rw_exit (rw );
52385279 }
52395280}
52405281
@@ -5250,6 +5291,8 @@ dbuf_remap(dnode_t *dn, dmu_buf_impl_t *db, dmu_tx_t *tx)
52505291 if (!spa_feature_is_active (spa , SPA_FEATURE_DEVICE_REMOVAL ))
52515292 return ;
52525293
5294+ mutex_enter (& db -> db_mtx );
5295+ rw_enter (& db -> db_rwlock , RW_READER );
52535296 if (db -> db_level > 0 ) {
52545297 blkptr_t * bp = db -> db .db_data ;
52555298 for (int i = 0 ; i < db -> db .db_size >> SPA_BLKPTRSHIFT ; i ++ ) {
@@ -5268,6 +5311,8 @@ dbuf_remap(dnode_t *dn, dmu_buf_impl_t *db, dmu_tx_t *tx)
52685311 }
52695312 }
52705313 }
5314+ rw_exit (& db -> db_rwlock );
5315+ mutex_exit (& db -> db_mtx );
52715316}
52725317
52735318
0 commit comments