diff --git a/cmd/zpool/Makefile.am b/cmd/zpool/Makefile.am index b9e221c1f7cc..a7fdcdb58e4b 100644 --- a/cmd/zpool/Makefile.am +++ b/cmd/zpool/Makefile.am @@ -18,7 +18,8 @@ endif zpool_LDADD = \ $(top_builddir)/lib/libnvpair/libnvpair.la \ $(top_builddir)/lib/libuutil/libuutil.la \ - $(top_builddir)/lib/libzfs/libzfs.la + $(top_builddir)/lib/libzfs/libzfs.la \ + $(top_builddir)/lib/libzpool/libzpool.la zpool_LDADD += -lm $(LIBBLKID) diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index f7696caac515..157a69c0efa0 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -64,6 +64,7 @@ #include #include #include +#include #include @@ -1852,8 +1853,8 @@ typedef struct status_cbdata { int cb_count; int cb_name_flags; int cb_namewidth; + unsigned int cb_verbose; boolean_t cb_allpools; - boolean_t cb_verbose; boolean_t cb_literal; boolean_t cb_explain; boolean_t cb_first; @@ -7400,7 +7401,18 @@ print_checkpoint_status(pool_checkpoint_stat_t *pcs) } static void -print_error_log(zpool_handle_t *zhp) +print_error_log_range_tree_cb(void *arg, uint64_t start, uint64_t size) +{ + char str[32]; + + zfs_nicenum(size, str, sizeof (str)); + + printf("%11s[0x%llx-0x%llx] (%s)\n", "", (u_longlong_t)start, + (u_longlong_t)(start + size - 1), str); +} + +static void +print_error_log(zpool_handle_t *zhp, unsigned int verbose) { nvlist_t *nverrlist = NULL; nvpair_t *elem; @@ -7410,23 +7422,81 @@ print_error_log(zpool_handle_t *zhp) if (zpool_get_errlog(zhp, &nverrlist) != 0) return; - (void) printf("errors: Permanent errors have been " - "detected in the following files:\n\n"); + printf_color(ANSI_RED, "errors: Permanent errors have been " + "detected in the following files:"); + printf("\n\n"); pathname = safe_malloc(len); elem = NULL; while ((elem = nvlist_next_nvpair(nverrlist, elem)) != NULL) { nvlist_t *nv; - uint64_t dsobj, obj; + uint64_t dsobj, obj, data_block_size, indirect_block_size; + uint64_t *block_ids; + int64_t *indrt_levels; + unsigned int error_count; + int rc = 0; verify(nvpair_value_nvlist(elem, &nv) == 0); verify(nvlist_lookup_uint64(nv, ZPOOL_ERR_DATASET, &dsobj) == 0); verify(nvlist_lookup_uint64(nv, ZPOOL_ERR_OBJECT, &obj) == 0); + verify(nvlist_lookup_int64_array(nv, ZPOOL_ERR_LEVEL, + &indrt_levels, &error_count) == 0); + verify(nvlist_lookup_uint64_array(nv, ZPOOL_ERR_BLOCKID, + &block_ids, &error_count) == 0); + zpool_obj_to_path(zhp, dsobj, obj, pathname, len); - (void) printf("%7s %s\n", "", pathname); + + if (error_count > 0) { + rc = zpool_get_block_size(zhp, dsobj, obj, + &data_block_size, &indirect_block_size); + } + if (rc == 0) { + char str[32]; + zfs_nicenum(data_block_size, str, sizeof (str)); + + (void) printf("%7s %s: found %u corrupted %s %s\n", + "", pathname, error_count, str, + error_count == 1 ? "block" : "blocks"); + + if (verbose > 1) { + range_tree_t *range_tree; + zfs_btree_init(); + range_tree = range_tree_create(NULL, + RANGE_SEG64, NULL, 0, 0); + if (!range_tree) + goto fail; + + /* Add all our blocks to the range tree */ + for (int i = 0; i < error_count; i++) { + uint8_t blkptr_size_shift = 0; + uint8_t indirect_block_shift = 0; + uint64_t offset_blks = block_ids[i] << + ((indirect_block_shift - + blkptr_size_shift) * + indrt_levels[i]); + + range_tree_add(range_tree, + offset_blks * data_block_size, + data_block_size); + } + + /* Print out our ranges */ + range_tree_walk(range_tree, + print_error_log_range_tree_cb, NULL); + + printf("\n"); + range_tree_vacate(range_tree, NULL, NULL); + range_tree_destroy(range_tree); + zfs_btree_fini(); + } + } else { + (void) printf("%7s %s %s\n", "", pathname, " can not " + "determine error offset"); + } } +fail: free(pathname); nvlist_free(nverrlist); } @@ -7975,7 +8045,7 @@ status_callback(zpool_handle_t *zhp, void *data) "errors, use '-v' for a list\n"), (u_longlong_t)nerr); else - print_error_log(zhp); + print_error_log(zhp, cbp->cb_verbose); } if (cbp->cb_dedup_stats) @@ -8063,7 +8133,7 @@ zpool_do_status(int argc, char **argv) cb.cb_print_slow_ios = B_TRUE; break; case 'v': - cb.cb_verbose = B_TRUE; + cb.cb_verbose++; break; case 'x': cb.cb_explain = B_TRUE; diff --git a/include/libzfs.h b/include/libzfs.h index 8e9f6fb3fc1b..8818baca4037 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -440,6 +440,8 @@ extern int zpool_events_clear(libzfs_handle_t *, int *); extern int zpool_events_seek(libzfs_handle_t *, uint64_t, int); extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *, size_t len); +extern int zpool_get_block_size(zpool_handle_t *, uint64_t, uint64_t, + uint64_t *, uint64_t *); extern int zfs_ioctl(libzfs_handle_t *, int, struct zfs_cmd *); extern int zpool_get_physpath(zpool_handle_t *, char *, size_t); extern void zpool_explain_recover(libzfs_handle_t *, const char *, int, diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 1474e1f049d8..d9dc45829aa3 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -1359,6 +1359,8 @@ typedef enum { #define ZPOOL_ERR_LIST "error list" #define ZPOOL_ERR_DATASET "dataset" #define ZPOOL_ERR_OBJECT "object" +#define ZPOOL_ERR_LEVEL "level" +#define ZPOOL_ERR_BLOCKID "block id" #define HIS_MAX_RECORD_LEN (MAXPATHLEN + MAXPATHLEN + 1) diff --git a/include/sys/zfs_stat.h b/include/sys/zfs_stat.h index 465aefaa2063..36367a5d6956 100644 --- a/include/sys/zfs_stat.h +++ b/include/sys/zfs_stat.h @@ -44,6 +44,8 @@ typedef struct zfs_stat { uint64_t zs_mode; uint64_t zs_links; uint64_t zs_ctime[2]; + uint64_t zs_data_block_size; + uint64_t zs_indirect_block_size; } zfs_stat_t; extern int zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb, diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index 7f3ec5d0d4fa..9c045cb91a9b 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -3956,6 +3956,60 @@ zbookmark_mem_compare(const void *a, const void *b) return (memcmp(a, b, sizeof (zbookmark_phys_t))); } +/* + * Given a sorted array of zbookmark_phys_t's, process one object groups worth + * (object group = objset + object), add it to the nvlist, and return + * the number of zbookmark_phys_ts processed. 'nv' is assumed to be already + * allocated. 'count' is the number of items in the zb[] array. + */ +static uint64_t +zpool_get_errlog_process_obj_group(zpool_handle_t *zhp, zbookmark_phys_t *zb, + uint64_t count, nvlist_t *nv) +{ + uint64_t error_count; + uint64_t *block_ids = NULL; + int64_t *indrt_levels = NULL; + uint64_t i; + + if (count == 0) + return (0); + + /* First see how many zbookmarks are of the same object group */ + for (i = 0; i < count; i++) { + if (i > 0 && !(zb[i - 1].zb_objset == zb[i].zb_objset && + zb[i - 1].zb_object == zb[i].zb_object)) { + /* We've hit a new object group */ + break; + } + } + + error_count = i; + + block_ids = zfs_alloc(zhp->zpool_hdl, error_count * + sizeof (*block_ids)); + indrt_levels = zfs_alloc(zhp->zpool_hdl, error_count * + sizeof (*indrt_levels)); + + /* Write our object group's objset and object */ + VERIFY0(nvlist_add_uint64(nv, ZPOOL_ERR_DATASET, zb[0].zb_objset)); + VERIFY0(nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT, zb[0].zb_object)); + + /* Write all the error'd blocks for this group */ + for (i = 0; i < error_count; i++) { + block_ids[i] = zb[i].zb_blkid; + indrt_levels[i] = zb[i].zb_level; + } + VERIFY0(nvlist_add_uint64_array(nv, ZPOOL_ERR_BLOCKID, block_ids, + error_count)); + VERIFY0(nvlist_add_int64_array(nv, ZPOOL_ERR_LEVEL, indrt_levels, + error_count)); + + free(indrt_levels); + free(block_ids); + + return (error_count); +} + /* * Retrieve the persistent error log, uniquify the members, and return to the * caller. @@ -3968,6 +4022,7 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp) uint64_t count; zbookmark_phys_t *zb = NULL; int i; + nvlist_t *nv; /* * Retrieve the raw error list from the kernel. If the number of errors @@ -4018,34 +4073,26 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp) verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0); + /* - * Fill in the nverrlistp with nvlist's of dataset and object numbers. + * zb[count] is an array of zbookmarks which point to error'd out + * blocks. We logically group these into objset + object, which + * we'll call an "object group", which is usually a file (but + * can be something else. + * + * The 'i = i' in this for() loop is to get rid of a cstyle warning: + * "comma or semicolon followed by non-blank" */ - for (i = 0; i < count; i++) { - nvlist_t *nv; - - /* ignoring zb_blkid and zb_level for now */ - if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset && - zb[i-1].zb_object == zb[i].zb_object) - continue; - + for (i = 0; i < count; i = i) { if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0) goto nomem; - if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET, - zb[i].zb_objset) != 0) { - nvlist_free(nv); - goto nomem; - } - if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT, - zb[i].zb_object) != 0) { - nvlist_free(nv); - goto nomem; - } + + i += zpool_get_errlog_process_obj_group(zhp, &zb[i], count - i, + nv); if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) { nvlist_free(nv); goto nomem; } - nvlist_free(nv); } free((void *)(uintptr_t)zc.zc_nvlist_dst); @@ -4390,6 +4437,36 @@ zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj, free(mntpnt); } +/* + * Given an dataset object number, return data block and indirect block size. + */ +int +zpool_get_block_size(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj, + uint64_t *data_blk_size, uint64_t *indrt_blk_size) +{ + zfs_cmd_t zc = {"\0"}; + char dsname[ZFS_MAX_DATASET_NAME_LEN]; + /* get the dataset's name */ + zc.zc_obj = dsobj; + (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); + int error = ioctl(zhp->zpool_hdl->libzfs_fd, + ZFS_IOC_DSOBJ_TO_DSNAME, &zc); + if (error) { + return (error); + } + (void) strlcpy(dsname, zc.zc_value, sizeof (dsname)); + + /* get data block and indirect block size */ + (void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name)); + zc.zc_obj = obj; + error = ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_STATS, &zc); + if (error == 0) { + *data_blk_size = zc.zc_stat.zs_data_block_size; + *indrt_blk_size = zc.zc_stat.zs_indirect_block_size; + } + return (error); +} + /* * Wait while the specified activity is in progress in the pool. */ diff --git a/man/man8/zpool-status.8 b/man/man8/zpool-status.8 index 7364bf635706..92f38ea63b38 100644 --- a/man/man8/zpool-status.8 +++ b/man/man8/zpool-status.8 @@ -121,7 +121,8 @@ See .Xr date 1 . .It Fl v Displays verbose data error information, printing out a complete list of all -data errors since the last complete pool scrub. +data errors since the last complete pool scrub. Passing this flag twice ('-vv') +will print out the byte ranges for the errors within the files. .It Fl x Only display status for pools that are exhibiting errors or are otherwise unavailable. diff --git a/module/os/linux/zfs/zfs_znode.c b/module/os/linux/zfs/zfs_znode.c index 45f19785d4ec..999ce0e49e03 100644 --- a/module/os/linux/zfs/zfs_znode.c +++ b/module/os/linux/zfs/zfs_znode.c @@ -2087,6 +2087,11 @@ zfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table, sa_bulk_attr_t bulk[4]; int count = 0; + dmu_object_info_t doi; + sa_object_info(hdl, &doi); + sb->zs_data_block_size = doi.doi_data_block_size; + sb->zs_indirect_block_size = doi.doi_metadata_block_size; + SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, &sb->zs_mode, sizeof (sb->zs_mode)); SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL, diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index a3396ac07ffc..42e356f4d385 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -443,7 +443,7 @@ tests = ['zpool_split_cliargs', 'zpool_split_devices', tags = ['functional', 'cli_root', 'zpool_split'] [tests/functional/cli_root/zpool_status] -tests = ['zpool_status_001_pos', 'zpool_status_002_pos'] +tests = ['zpool_status_001_pos', 'zpool_status_002_pos', 'zpool_status_-v'] tags = ['functional', 'cli_root', 'zpool_status'] [tests/functional/cli_root/zpool_sync] diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_status/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_status/Makefile.am index beb59e3d066b..39e451fed945 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_status/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_status/Makefile.am @@ -3,4 +3,5 @@ dist_pkgdata_SCRIPTS = \ setup.ksh \ cleanup.ksh \ zpool_status_001_pos.ksh \ - zpool_status_002_pos.ksh + zpool_status_002_pos.ksh \ + zpool_status_-v.ksh diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_-v.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_-v.ksh new file mode 100755 index 000000000000..54233dff474f --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_-v.ksh @@ -0,0 +1,64 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright (c) 2019 Lawrence Livermore National Security, LLC. + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Verify correct output with 'zpool status -v' after corrupting a file +# +# STRATEGY: +# 1. Create a file +# 2. zinject checksum errors +# 3. Read the file +# 4. Verify we see "file corrupted" output in 'zpool status -v' +# 5. Verify we see one of the corrupted ranges in 'zpool status -vv' + +verify_runnable "both" + +log_assert "Verify correct 'zpool status -v' output with a corrupted file" +log_must mkfile 10m $TESTDIR/10m_file +log_must mkfile 1m $TESTDIR/1m_file + +log_must zpool export $TESTPOOL +log_must zpool import $TESTPOOL + +log_must zinject -t data -e checksum -f 100 $TESTDIR/10m_file +log_must zinject -t data -e checksum -f 100 $TESTDIR/1m_file + +# Try to read '1m_file'. It should stop after the first 128k block. +cat $TESTDIR/1m_file > /dev/null || true + +# Try to read the 2nd megabyte of '10m_file' +dd if=$TESTDIR/10m_file bs=1M skip=1 count=1 || true + +log_must zinject -c all + +# Look to see that both our files report errors +log_must eval "zpool status -v | grep '10m_file: found'" +log_must eval "zpool status -v | grep '1m_file: found'" + +# Look for one of our error ranges +log_must eval "zpool status -vv | grep '[0x100000-0x1fffff]'" + +log_pass "'zpool status -vv' output is correct"