@@ -1193,7 +1193,8 @@ dbuf_verify(dmu_buf_impl_t *db)
11931193 if ((db -> db_blkptr == NULL || BP_IS_HOLE (db -> db_blkptr )) &&
11941194 (db -> db_buf == NULL || db -> db_buf -> b_data ) &&
11951195 db -> db .db_data && db -> db_blkid != DMU_BONUS_BLKID &&
1196- db -> db_state != DB_FILL && (dn == NULL || !dn -> dn_free_txg )) {
1196+ db -> db_state != DB_FILL && (dn == NULL || !dn -> dn_free_txg ) &&
1197+ RW_LOCK_HELD (& db -> db_rwlock )) {
11971198 /*
11981199 * If the blkptr isn't set but they have nonzero data,
11991200 * it had better be dirty, otherwise we'll lose that
@@ -1697,7 +1698,9 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
16971698 int bonuslen = DN_SLOTS_TO_BONUSLEN (dn -> dn_num_slots );
16981699 dr -> dt .dl .dr_data = kmem_alloc (bonuslen , KM_SLEEP );
16991700 arc_space_consume (bonuslen , ARC_SPACE_BONUS );
1701+ rw_enter (& db -> db_rwlock , RW_READER );
17001702 memcpy (dr -> dt .dl .dr_data , db -> db .db_data , bonuslen );
1703+ rw_exit (& db -> db_rwlock );
17011704 } else if (zfs_refcount_count (& db -> db_holds ) > db -> db_dirtycnt ) {
17021705 dnode_t * dn = DB_DNODE (db );
17031706 int size = arc_buf_size (db -> db_buf );
@@ -1727,7 +1730,9 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
17271730 } else {
17281731 dr -> dt .dl .dr_data = arc_alloc_buf (spa , db , type , size );
17291732 }
1733+ rw_enter (& db -> db_rwlock , RW_READER );
17301734 memcpy (dr -> dt .dl .dr_data -> b_data , db -> db .db_data , size );
1735+ rw_exit (& db -> db_rwlock );
17311736 } else {
17321737 db -> db_buf = NULL ;
17331738 dbuf_clear_data (db );
@@ -2999,7 +3004,9 @@ dmu_buf_fill_done(dmu_buf_t *dbuf, dmu_tx_t *tx, boolean_t failed)
29993004 ASSERT (db -> db_blkid != DMU_BONUS_BLKID );
30003005 /* we were freed while filling */
30013006 /* XXX dbuf_undirty? */
3007+ rw_enter (& db -> db_rwlock , RW_WRITER );
30023008 memset (db -> db .db_data , 0 , db -> db .db_size );
3009+ rw_exit (& db -> db_rwlock );
30033010 db -> db_freed_in_flight = FALSE;
30043011 db -> db_state = DB_CACHED ;
30053012 DTRACE_SET_STATE (db ,
@@ -3374,8 +3381,10 @@ dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse,
33743381 * parentp = NULL ;
33753382 return (err );
33763383 }
3384+ mutex_enter (& (* parentp )-> db_mtx );
33773385 * bpp = ((blkptr_t * )(* parentp )-> db .db_data ) +
33783386 (blkid & ((1ULL << epbs ) - 1 ));
3387+ mutex_exit (& (* parentp )-> db_mtx );
33793388 return (0 );
33803389 } else {
33813390 /* the block is referenced from the dnode */
@@ -4560,10 +4569,12 @@ dbuf_lightweight_bp(dbuf_dirty_record_t *dr)
45604569 return (& dn -> dn_phys -> dn_blkptr [dr -> dt .dll .dr_blkid ]);
45614570 } else {
45624571 dmu_buf_impl_t * parent_db = dr -> dr_parent -> dr_dbuf ;
4572+ ASSERT (MUTEX_HELD (& parent_db -> db_mtx ));
45634573 int epbs = dn -> dn_indblkshift - SPA_BLKPTRSHIFT ;
45644574 VERIFY3U (parent_db -> db_level , = = , 1 );
45654575 VERIFY3P (DB_DNODE (parent_db ), = = , dn );
45664576 VERIFY3U (dr -> dt .dll .dr_blkid >> epbs , = = , parent_db -> db_blkid );
4577+ ASSERT (RW_LOCK_HELD (& parent_db -> db_rwlock ));
45674578 blkptr_t * bp = parent_db -> db .db_data ;
45684579 return (& bp [dr -> dt .dll .dr_blkid & ((1 << epbs ) - 1 )]);
45694580 }
@@ -4574,12 +4585,22 @@ dbuf_lightweight_ready(zio_t *zio)
45744585{
45754586 dbuf_dirty_record_t * dr = zio -> io_private ;
45764587 blkptr_t * bp = zio -> io_bp ;
4588+ dmu_buf_impl_t * parent_db = NULL ;
45774589
45784590 if (zio -> io_error != 0 )
45794591 return ;
45804592
45814593 dnode_t * dn = dr -> dr_dnode ;
45824594
4595+ EQUIV (dr -> dr_parent == NULL , dn -> dn_phys -> dn_nlevels == 1 );
4596+ if (dr -> dr_parent == NULL ) {
4597+ parent_db = dn -> dn_dbuf ;
4598+ } else {
4599+ parent_db = dr -> dr_parent -> dr_dbuf ;
4600+ }
4601+ mutex_enter (& parent_db -> db_mtx );
4602+
4603+ rw_enter (& parent_db -> db_rwlock , RW_READER );
45834604 blkptr_t * bp_orig = dbuf_lightweight_bp (dr );
45844605 spa_t * spa = dmu_objset_spa (dn -> dn_objset );
45854606 int64_t delta = bp_get_dsize_sync (spa , bp ) -
@@ -4599,16 +4620,13 @@ dbuf_lightweight_ready(zio_t *zio)
45994620 BP_SET_FILL (bp , fill );
46004621 }
46014622
4602- dmu_buf_impl_t * parent_db ;
4603- EQUIV (dr -> dr_parent == NULL , dn -> dn_phys -> dn_nlevels == 1 );
4604- if (dr -> dr_parent == NULL ) {
4605- parent_db = dn -> dn_dbuf ;
4606- } else {
4607- parent_db = dr -> dr_parent -> dr_dbuf ;
4623+ if (!rw_tryupgrade (& parent_db -> db_rwlock )) {
4624+ rw_exit (& parent_db -> db_rwlock );
4625+ rw_enter (& parent_db -> db_rwlock , RW_WRITER );
46084626 }
4609- rw_enter (& parent_db -> db_rwlock , RW_WRITER );
46104627 * bp_orig = * bp ;
46114628 rw_exit (& parent_db -> db_rwlock );
4629+ mutex_exit (& parent_db -> db_mtx );
46124630}
46134631
46144632static void
@@ -4640,6 +4658,7 @@ noinline static void
46404658dbuf_sync_lightweight (dbuf_dirty_record_t * dr , dmu_tx_t * tx )
46414659{
46424660 dnode_t * dn = dr -> dr_dnode ;
4661+ dmu_buf_impl_t * parent_db = NULL ;
46434662 zio_t * pio ;
46444663 if (dn -> dn_phys -> dn_nlevels == 1 ) {
46454664 pio = dn -> dn_zio ;
@@ -4658,6 +4677,11 @@ dbuf_sync_lightweight(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
46584677 * See comment in dbuf_write(). This is so that zio->io_bp_orig
46594678 * will have the old BP in dbuf_lightweight_done().
46604679 */
4680+ if (dr -> dr_dnode -> dn_phys -> dn_nlevels != 1 ) {
4681+ parent_db = dr -> dr_parent -> dr_dbuf ;
4682+ mutex_enter (& parent_db -> db_mtx );
4683+ rw_enter (& parent_db -> db_rwlock , RW_READER );
4684+ }
46614685 dr -> dr_bp_copy = * dbuf_lightweight_bp (dr );
46624686
46634687 dr -> dr_zio = zio_write (pio , dmu_objset_spa (dn -> dn_objset ),
@@ -4667,6 +4691,11 @@ dbuf_sync_lightweight(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
46674691 dbuf_lightweight_done , dr , ZIO_PRIORITY_ASYNC_WRITE ,
46684692 ZIO_FLAG_MUSTSUCCEED | dr -> dt .dll .dr_flags , & zb );
46694693
4694+ if (parent_db ) {
4695+ rw_exit (& parent_db -> db_rwlock );
4696+ mutex_exit (& parent_db -> db_mtx );
4697+ }
4698+
46704699 zio_nowait (dr -> dr_zio );
46714700}
46724701
@@ -4823,7 +4852,9 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
48234852 } else {
48244853 * datap = arc_alloc_buf (os -> os_spa , db , type , psize );
48254854 }
4855+ rw_enter (& db -> db_rwlock , RW_READER );
48264856 memcpy ((* datap )-> b_data , db -> db .db_data , psize );
4857+ rw_exit (& db -> db_rwlock );
48274858 }
48284859 db -> db_data_pending = dr ;
48294860
@@ -4929,6 +4960,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
49294960
49304961 if (dn -> dn_type == DMU_OT_DNODE ) {
49314962 i = 0 ;
4963+ rw_enter (& db -> db_rwlock , RW_READER );
49324964 while (i < db -> db .db_size ) {
49334965 dnode_phys_t * dnp =
49344966 (void * )(((char * )db -> db .db_data ) + i );
@@ -4954,6 +4986,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
49544986 DNODE_MIN_SIZE ;
49554987 }
49564988 }
4989+ rw_exit (& db -> db_rwlock );
49574990 } else {
49584991 if (BP_IS_HOLE (bp )) {
49594992 fill = 0 ;
@@ -4962,6 +4995,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
49624995 }
49634996 }
49644997 } else {
4998+ rw_enter (& db -> db_rwlock , RW_READER );
49654999 blkptr_t * ibp = db -> db .db_data ;
49665000 ASSERT3U (db -> db .db_size , = = , 1 <<dn -> dn_phys -> dn_indblkshift );
49675001 for (i = db -> db .db_size >> SPA_BLKPTRSHIFT ; i > 0 ; i -- , ibp ++ ) {
@@ -4971,6 +5005,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
49715005 BLK_CONFIG_SKIP , BLK_VERIFY_HALT );
49725006 fill += BP_GET_FILL (ibp );
49735007 }
5008+ rw_exit (& db -> db_rwlock );
49745009 }
49755010 DB_DNODE_EXIT (db );
49765011
@@ -5005,6 +5040,8 @@ dbuf_write_children_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
50055040 DB_DNODE_EXIT (db );
50065041 ASSERT3U (epbs , < , 31 );
50075042
5043+ mutex_enter (& db -> db_mtx );
5044+ rw_enter (& db -> db_rwlock , RW_READER );
50085045 /* Determine if all our children are holes */
50095046 for (i = 0 , bp = db -> db .db_data ; i < 1ULL << epbs ; i ++ , bp ++ ) {
50105047 if (!BP_IS_HOLE (bp ))
@@ -5021,10 +5058,14 @@ dbuf_write_children_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
50215058 * anybody from reading the blocks we're about to
50225059 * zero out.
50235060 */
5024- rw_enter (& db -> db_rwlock , RW_WRITER );
5061+ if (!rw_tryupgrade (& db -> db_rwlock )) {
5062+ rw_exit (& db -> db_rwlock );
5063+ rw_enter (& db -> db_rwlock , RW_WRITER );
5064+ }
50255065 memset (db -> db .db_data , 0 , db -> db .db_size );
5026- rw_exit (& db -> db_rwlock );
50275066 }
5067+ rw_exit (& db -> db_rwlock );
5068+ mutex_exit (& db -> db_mtx );
50285069}
50295070
50305071static void
@@ -5220,11 +5261,11 @@ dbuf_remap_impl(dnode_t *dn, blkptr_t *bp, krwlock_t *rw, dmu_tx_t *tx)
52205261 * avoid lock contention, only grab it when we are actually
52215262 * changing the BP.
52225263 */
5223- if (rw != NULL )
5264+ if (rw != NULL && !rw_tryupgrade (rw )) {
5265+ rw_exit (rw );
52245266 rw_enter (rw , RW_WRITER );
5267+ }
52255268 * bp = bp_copy ;
5226- if (rw != NULL )
5227- rw_exit (rw );
52285269 }
52295270}
52305271
@@ -5240,6 +5281,8 @@ dbuf_remap(dnode_t *dn, dmu_buf_impl_t *db, dmu_tx_t *tx)
52405281 if (!spa_feature_is_active (spa , SPA_FEATURE_DEVICE_REMOVAL ))
52415282 return ;
52425283
5284+ mutex_enter (& db -> db_mtx );
5285+ rw_enter (& db -> db_rwlock , RW_READER );
52435286 if (db -> db_level > 0 ) {
52445287 blkptr_t * bp = db -> db .db_data ;
52455288 for (int i = 0 ; i < db -> db .db_size >> SPA_BLKPTRSHIFT ; i ++ ) {
@@ -5258,6 +5301,8 @@ dbuf_remap(dnode_t *dn, dmu_buf_impl_t *db, dmu_tx_t *tx)
52585301 }
52595302 }
52605303 }
5304+ rw_exit (& db -> db_rwlock );
5305+ mutex_exit (& db -> db_mtx );
52615306}
52625307
52635308
0 commit comments