@@ -2340,6 +2340,15 @@ vdev_sit_out_reads(vdev_t *vd, zio_flag_t io_flags)
2340
2340
/* Avoid skipping a data column read when scrubbing */
2341
2341
if (io_flags & ZIO_FLAG_SCRUB )
2342
2342
return (B_FALSE );
2343
+
2344
+ if (!vd -> vdev_ops -> vdev_op_leaf ) {
2345
+ boolean_t sitting = B_FALSE ;
2346
+ for (int c = 0 ; c < vd -> vdev_children ; c ++ ) {
2347
+ sitting |= vdev_sit_out_reads (vd -> vdev_child [c ],
2348
+ io_flags );
2349
+ }
2350
+ return (sitting );
2351
+ }
2343
2352
if (vd -> vdev_read_sit_out_expire >= gethrestime_sec ())
2344
2353
return (B_TRUE );
2345
2354
vd -> vdev_read_sit_out_expire = 0 ;
@@ -2904,6 +2913,13 @@ latency_compare(const void *arg1, const void *arg2)
2904
2913
void
2905
2914
vdev_raidz_sit_child (vdev_t * svd )
2906
2915
{
2916
+ for (int c = 0 ; c < svd -> vdev_children ; c ++ ) {
2917
+ vdev_raidz_sit_child (svd -> vdev_child [c ]);
2918
+ }
2919
+
2920
+ if (!svd -> vdev_ops -> vdev_op_leaf )
2921
+ return ;
2922
+
2907
2923
/*
2908
2924
* Begin a sit out period for this slow drive
2909
2925
*/
@@ -2915,6 +2931,19 @@ vdev_raidz_sit_child(vdev_t *svd)
2915
2931
mutex_exit (& svd -> vdev_stat_lock );
2916
2932
}
2917
2933
2934
+ void
2935
+ vdev_raidz_unsit_child (vdev_t * vd )
2936
+ {
2937
+ for (int c = 0 ; c < vd -> vdev_children ; c ++ ) {
2938
+ vdev_raidz_sit_child (vd -> vdev_child [c ]);
2939
+ }
2940
+
2941
+ if (!vd -> vdev_ops -> vdev_op_leaf )
2942
+ return ;
2943
+
2944
+ vd -> vdev_read_sit_out_expire = 0 ;
2945
+ }
2946
+
2918
2947
/*
2919
2948
* Check for any latency outlier from latest set of child reads.
2920
2949
*
@@ -2943,7 +2972,7 @@ vdev_child_slow_outlier(zio_t *zio)
2943
2972
vd -> vdev_children < LAT_SAMPLES_MIN )
2944
2973
return ;
2945
2974
2946
- hrtime_t now = gethrtime ();
2975
+ hrtime_t now = getlrtime ();
2947
2976
uint64_t last = atomic_load_64 (& vd -> vdev_last_latency_check );
2948
2977
2949
2978
if ((now - last ) < MSEC2NSEC (vdev_raidz_outlier_check_interval_ms ) ||
@@ -2976,64 +3005,64 @@ vdev_child_slow_outlier(zio_t *zio)
2976
3005
uint64_t max = 0 ;
2977
3006
vdev_t * svd = NULL ; /* suspect vdev */
2978
3007
uint_t sitouts = 0 ;
3008
+ boolean_t skip = B_FALSE , svd_sitting = B_FALSE ;
2979
3009
for (int c = 0 ; c < samples ; c ++ ) {
2980
3010
vdev_t * cvd = vd -> vdev_child [c ];
3011
+ boolean_t sitting = vdev_sit_out_reads (cvd , 0 );
2981
3012
2982
- if (cvd -> vdev_read_sit_out_expire != 0 ) {
2983
- if (cvd -> vdev_read_sit_out_expire < gethrestime_sec ()) {
2984
- /*
2985
- * Done with our sit out, wait for new outlier
2986
- * to emerge.
2987
- */
2988
- cvd -> vdev_read_sit_out_expire = 0 ;
2989
- } else if (sitouts ++ >= vdev_get_nparity (vd )) {
2990
- /*
2991
- * We can't sit out more disks than we have
2992
- * parity
2993
- */
2994
- goto out ;
2995
- }
3013
+ if (sitting && sitouts ++ >= vdev_get_nparity (vd )) {
3014
+ /*
3015
+ * We can't sit out more disks than we have
3016
+ * parity
3017
+ */
3018
+ skip = B_TRUE ;
2996
3019
}
2997
3020
mutex_enter (& cvd -> vdev_stat_lock );
3021
+
3022
+ uint64_t * prev_histo = cvd -> vdev_prev_histo ;
2998
3023
uint64_t * histo =
2999
3024
cvd -> vdev_stat_ex .vsx_disk_histo [ZIO_TYPE_READ ];
3000
- uint64_t * prev_histo = cvd -> vdev_prev_histo ;
3025
+ if (skip ) {
3026
+ size_t size =
3027
+ sizeof (cvd -> vdev_stat_ex .vsx_disk_histo [0 ]);
3028
+ memcpy (prev_histo , histo , size );
3029
+ mutex_exit (& cvd -> vdev_stat_lock );
3030
+ continue ;
3031
+ }
3001
3032
uint64_t count = 0 ;
3002
3033
lat_data [c ] = 0 ;
3003
3034
for (int i = 0 ; i < VDEV_L_HISTO_BUCKETS ; i ++ ) {
3004
3035
uint64_t this_count = histo [i ] - prev_histo [i ];
3005
3036
lat_data [c ] += (1ULL << i ) * this_count ;
3006
3037
count += this_count ;
3007
3038
}
3039
+ size_t size = sizeof (cvd -> vdev_stat_ex .vsx_disk_histo [0 ]);
3040
+ memcpy (prev_histo , histo , size );
3008
3041
mutex_exit (& cvd -> vdev_stat_lock );
3009
3042
lat_data [c ] /= MAX (1 , count );
3010
3043
3011
3044
/* wait until all disks have been read from */
3012
- if (lat_data [c ] == 0 && cvd -> vdev_read_sit_out_expire == 0 )
3013
- goto out ;
3045
+ if (lat_data [c ] == 0 && !sitting ) {
3046
+ skip = B_TRUE ;
3047
+ continue ;
3048
+ }
3014
3049
3015
3050
/* keep track of the vdev with largest value */
3016
3051
if (lat_data [c ] > max ) {
3017
3052
max = lat_data [c ];
3018
3053
svd = cvd ;
3054
+ svd_sitting = sitting ;
3019
3055
}
3020
3056
}
3021
-
3022
- for (int c = 0 ; c < samples ; c ++ ) {
3023
- vdev_t * cvd = vd -> vdev_child [c ];
3024
- mutex_enter (& cvd -> vdev_stat_lock );
3025
- size_t size = sizeof (cvd -> vdev_stat_ex .vsx_disk_histo [0 ]);
3026
- memcpy (cvd -> vdev_prev_histo ,
3027
- cvd -> vdev_stat_ex .vsx_disk_histo [ZIO_TYPE_READ ], size );
3028
- mutex_exit (& cvd -> vdev_stat_lock );
3029
- }
3057
+ if (skip )
3058
+ goto out ;
3030
3059
3031
3060
qsort ((void * )lat_data , samples , sizeof (uint64_t ), latency_compare );
3032
3061
uint64_t iqr = 0 ;
3033
3062
uint64_t fence = latency_quartiles_fence (lat_data , samples , & iqr );
3034
3063
3035
3064
ASSERT3U (lat_data [samples - 1 ], = = , max );
3036
- if (max > fence && svd -> vdev_read_sit_out_expire == 0 ) {
3065
+ if (max > fence && ! svd_sitting ) {
3037
3066
uint64_t incr = MAX (1 , (max - fence ) / iqr );
3038
3067
vd -> vdev_outlier_count += incr ;
3039
3068
if (vd -> vdev_outlier_count >= samples ) {
0 commit comments