@@ -2496,34 +2496,32 @@ dnode_diduse_space(dnode_t *dn, int64_t delta)
24962496}
24972497
24982498/*
2499- * Scans a block at the indicated "level" looking for a hole or data,
2500- * depending on 'flags'.
2499+ * Scans the block at the indicated "level" looking for a hole or data,
2500+ * depending on 'flags' starting from array position given by *index .
25012501 *
2502- * If level > 0, then we are scanning an indirect block looking at its
2503- * pointers. If level == 0, then we are looking at a block of dnodes.
2502+ * If lvl > 0, then we are scanning an indirect block looking at its
2503+ * pointers. If lvl == 0, then we are looking at a block of dnodes.
25042504 *
25052505 * If we don't find what we are looking for in the block, we return ESRCH.
2506- * Otherwise, return with *offset pointing to the beginning (if searching
2507- * forwards) or end (if searching backwards) of the range covered by the
2508- * block pointer we matched on (or dnode).
2506+ * Otherwise, return with *index set to the matching array position.
25092507 *
2510- * The basic search algorithm used below by dnode_next_offset() is to
2511- * use this function to search up the block tree (widen the search) until
2512- * we find something (i.e., we don't return ESRCH) and then search back
2513- * down the tree (narrow the search) until we reach our original search
2514- * level.
2508+ * The basic search algorithm used below by dnode_next_offset() uses this
2509+ * function to perform a block-order tree traversal. We search up the block
2510+ * tree (widen the search) until we find something (i.e., we don't return
2511+ * ESRCH) and then search back down the tree (narrow the search) until we
2512+ * reach our original search level or backtrack up because nothing matches .
25152513 */
25162514static int
2517- dnode_next_offset_level (dnode_t * dn , int flags , uint64_t * offset ,
2518- int lvl , uint64_t blkfill , uint64_t txg )
2515+ dnode_next_offset_level (dnode_t * dn , int flags , int lvl , uint64_t blkid ,
2516+ int * index , uint64_t blkfill , uint64_t txg )
25192517{
25202518 dmu_buf_impl_t * db = NULL ;
25212519 void * data = NULL ;
25222520 uint64_t epbs = dn -> dn_phys -> dn_indblkshift - SPA_BLKPTRSHIFT ;
25232521 uint64_t epb = 1ULL << epbs ;
25242522 uint64_t minfill , maxfill ;
25252523 boolean_t hole ;
2526- int i , inc , error , span ;
2524+ int i = * index , inc , error ;
25272525
25282526 ASSERT (RW_LOCK_HELD (& dn -> dn_struct_rwlock ));
25292527
@@ -2541,20 +2539,13 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
25412539 rrw_enter (& dmu_objset_ds (dn -> dn_objset )-> ds_bp_rwlock ,
25422540 RW_READER , FTAG );
25432541 } else {
2544- uint64_t blkid = dbuf_whichblock (dn , lvl , * offset );
25452542 error = dbuf_hold_impl (dn , lvl , blkid , TRUE, FALSE, FTAG , & db );
25462543 if (error ) {
25472544 if (error != ENOENT )
25482545 return (error );
25492546 if (hole )
25502547 return (0 );
2551- /*
2552- * This can only happen when we are searching up
2553- * the block tree for data. We don't really need to
2554- * adjust the offset, as we will just end up looking
2555- * at the pointer to this block in its parent, and its
2556- * going to be unallocated, so we will skip over it.
2557- */
2548+ /* Unallocated; see comment in dnode_next_offset. */
25582549 return (SET_ERROR (ESRCH ));
25592550 }
25602551 error = dbuf_read (db , NULL ,
@@ -2582,21 +2573,15 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
25822573 ASSERT (dn -> dn_type == DMU_OT_DNODE );
25832574 ASSERT (!(flags & DNODE_FIND_BACKWARDS ));
25842575
2585- for (i = (* offset >> DNODE_SHIFT ) & (blkfill - 1 );
2586- i < blkfill ; i += dnp [i ].dn_extra_slots + 1 ) {
2576+ for (; i < blkfill ; i += dnp [i ].dn_extra_slots + 1 ) {
25872577 if ((dnp [i ].dn_type == DMU_OT_NONE ) == hole )
25882578 break ;
25892579 }
25902580
2591- if (i = = blkfill )
2581+ if (i > = blkfill )
25922582 error = SET_ERROR (ESRCH );
2593-
2594- * offset = (* offset & ~(DNODE_BLOCK_SIZE - 1 )) +
2595- (i << DNODE_SHIFT );
25962583 } else {
25972584 blkptr_t * bp = data ;
2598- uint64_t start = * offset ;
2599- span = (lvl - 1 ) * epbs + dn -> dn_datablkshift ;
26002585 minfill = 0 ;
26012586 maxfill = blkfill << ((lvl - 1 ) * epbs );
26022587
@@ -2605,38 +2590,13 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
26052590 else
26062591 minfill ++ ;
26072592
2608- if (span >= 8 * sizeof (* offset )) {
2609- /* This only happens on the highest indirection level */
2610- ASSERT3U ((lvl - 1 ), = = , dn -> dn_phys -> dn_nlevels - 1 );
2611- * offset = 0 ;
2612- } else {
2613- * offset = * offset >> span ;
2614- }
2615-
2616- for (i = BF64_GET (* offset , 0 , epbs );
2617- i >= 0 && i < epb ; i += inc ) {
2593+ for (; i >= 0 && i < epb ; i += inc ) {
26182594 if (BP_GET_FILL (& bp [i ]) >= minfill &&
26192595 BP_GET_FILL (& bp [i ]) <= maxfill &&
26202596 (hole || BP_GET_LOGICAL_BIRTH (& bp [i ]) > txg ))
26212597 break ;
2622- if (inc > 0 || * offset > 0 )
2623- * offset += inc ;
26242598 }
26252599
2626- if (span >= 8 * sizeof (* offset )) {
2627- * offset = start ;
2628- } else {
2629- * offset = * offset << span ;
2630- }
2631-
2632- if (inc < 0 ) {
2633- /* traversing backwards; position offset at the end */
2634- if (span < 8 * sizeof (* offset ))
2635- * offset = MIN (* offset + (1ULL << span ) - 1 ,
2636- start );
2637- } else if (* offset < start ) {
2638- * offset = start ;
2639- }
26402600 if (i < 0 || i >= epb )
26412601 error = SET_ERROR (ESRCH );
26422602 }
@@ -2652,35 +2612,10 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
26522612 FTAG );
26532613 }
26542614
2615+ * index = i ;
26552616 return (error );
26562617}
26572618
2658- /*
2659- * Adjust *offset to the next (or previous) block byte offset at lvl.
2660- * Returns FALSE if *offset would overflow or underflow.
2661- */
2662- static boolean_t
2663- dnode_next_block (dnode_t * dn , int flags , uint64_t * offset , int lvl )
2664- {
2665- int epbs = dn -> dn_indblkshift - SPA_BLKPTRSHIFT ;
2666- int span = lvl * epbs + dn -> dn_datablkshift ;
2667- uint64_t blkid , maxblkid ;
2668-
2669- if (span >= 8 * sizeof (uint64_t ))
2670- return (B_FALSE );
2671-
2672- blkid = * offset >> span ;
2673- maxblkid = 1ULL << (8 * sizeof (* offset ) - span );
2674- if (!(flags & DNODE_FIND_BACKWARDS ) && blkid + 1 < maxblkid )
2675- * offset = (blkid + 1 ) << span ;
2676- else if ((flags & DNODE_FIND_BACKWARDS ) && blkid > 0 )
2677- * offset = (blkid << span ) - 1 ;
2678- else
2679- return (B_FALSE );
2680-
2681- return (B_TRUE );
2682- }
2683-
26842619/*
26852620 * Find the next hole, data, or sparse region at or after *offset.
26862621 * The value 'blkfill' tells us how many items we expect to find
@@ -2708,9 +2643,11 @@ int
27082643dnode_next_offset (dnode_t * dn , int flags , uint64_t * offset ,
27092644 int minlvl , uint64_t blkfill , uint64_t txg )
27102645{
2711- uint64_t matched = * offset ;
2646+ uint64_t blkid ;
2647+ int index ;
27122648 int lvl , maxlvl ;
27132649 int error = 0 ;
2650+ int epbs = dn -> dn_indblkshift - SPA_BLKPTRSHIFT ;
27142651
27152652 if (!(flags & DNODE_FIND_HAVELOCK ))
27162653 rw_enter (& dn -> dn_struct_rwlock , RW_READER );
@@ -2730,18 +2667,29 @@ dnode_next_offset(dnode_t *dn, int flags, uint64_t *offset,
27302667 goto out ;
27312668 }
27322669
2670+ if (minlvl > 0 ) {
2671+ uint64_t n = dbuf_whichblock (dn , minlvl - 1 , * offset );
2672+ blkid = n >> epbs ;
2673+ index = BF64_GET (n , 0 , epbs );
2674+ } else {
2675+ blkid = dbuf_whichblock (dn , 0 , * offset );
2676+ index = (* offset >> DNODE_SHIFT ) & (blkfill - 1 );
2677+ }
2678+
27332679 maxlvl = dn -> dn_phys -> dn_nlevels ;
27342680
27352681 for (lvl = minlvl ; lvl <= maxlvl ; ) {
27362682 error = dnode_next_offset_level (dn ,
2737- flags , offset , lvl , blkfill , txg );
2683+ flags , lvl , blkid , & index , blkfill , txg );
27382684 if (error == 0 && lvl > minlvl ) {
2685+ /* Continue search at matched block in lvl-1. */
2686+ blkid = (blkid << epbs ) + index ;
2687+ index = 0 ;
27392688 -- lvl ;
2740- matched = * offset ;
2741- } else if (error == ESRCH && lvl < maxlvl &&
2742- dnode_next_block (dn , flags , & matched , lvl )) {
2689+ } else if (error == ESRCH && lvl < maxlvl ) {
27432690 /*
2744- * Continue search at next/prev offset in lvl+1 block.
2691+ * Continue search at next/prev offset in lvl+1 block
2692+ * but stop if blkid would underflow or overflow.
27452693 *
27462694 * Usually we only search upwards at the start of the
27472695 * search as higher level blocks point at a matching
@@ -2751,14 +2699,19 @@ dnode_next_offset(dnode_t *dn, int flags, uint64_t *offset,
27512699 * contains only BPs/dnodes freed at that txg. It also
27522700 * happens if we are still syncing out the tree, and
27532701 * some BP's at higher levels are not updated yet.
2754- *
2755- * We must adjust offset to avoid coming back to the
2756- * same offset and getting stuck looping forever. This
2757- * also deals with the case where offset is already at
2758- * the beginning or end of the object.
27592702 */
2703+ if (flags & DNODE_FIND_BACKWARDS ) {
2704+ if (blkid == 0 )
2705+ break ;
2706+ -- blkid ;
2707+ } else {
2708+ if (blkid == UINT64_MAX )
2709+ break ;
2710+ ++ blkid ;
2711+ }
2712+ index = BF64_GET (blkid , 0 , epbs );
2713+ blkid = blkid >> epbs ;
27602714 ++ lvl ;
2761- * offset = matched ;
27622715 } else {
27632716 break ;
27642717 }
@@ -2773,6 +2726,24 @@ dnode_next_offset(dnode_t *dn, int flags, uint64_t *offset,
27732726 error = 0 ;
27742727 }
27752728
2729+ if (lvl > 0 ) {
2730+ uint64_t n = blkid << epbs ;
2731+ if (index > 0 || n > 0 )
2732+ n += index ; /* -1 <= index <= 1<<epbs */
2733+
2734+ int span = (lvl - 1 ) * epbs + dn -> dn_datablkshift ;
2735+ if (span >= 8 * sizeof (uint64_t ))
2736+ * offset = 0 ;
2737+ else if (flags & DNODE_FIND_BACKWARDS )
2738+ /* traversing backwards; position at block end */
2739+ * offset = MIN (* offset , ((n + 1 ) << span ) - 1 );
2740+ else
2741+ * offset = MAX (* offset , n << span );
2742+ } else {
2743+ * offset = (blkid << dn -> dn_datablkshift ) +
2744+ (index << DNODE_SHIFT ); /* 0 <= index <= blkfill */
2745+ }
2746+
27762747out :
27772748 if (!(flags & DNODE_FIND_HAVELOCK ))
27782749 rw_exit (& dn -> dn_struct_rwlock );
0 commit comments