From 4b899738894c1b5dde7798cdf3eca10489ae2d29 Mon Sep 17 00:00:00 2001 From: Huginn Date: Sun, 7 Sep 2025 17:21:37 +0800 Subject: [PATCH 1/3] HBASE-29574 Resolve the issue that splitting HFiles results in write failures due to storage policy during bulkload --- .../org/apache/hadoop/hbase/tool/BulkLoadHFilesTool.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/BulkLoadHFilesTool.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/BulkLoadHFilesTool.java index 4d6f57e22edc..abc4ae1be12a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/BulkLoadHFilesTool.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/BulkLoadHFilesTool.java @@ -813,6 +813,11 @@ private static void copyHFileHalf(Configuration conf, Path inFile, Path outFile, storeFileInfo.getHFileInfo().initMetaAndIndex(halfReader.getHFileReader()); Map fileInfo = halfReader.loadFileInfo(); + if (familyDescriptor.getStoragePolicy() != null) { + conf = new Configuration(conf); + conf.set(ColumnFamilyDescriptorBuilder.STORAGE_POLICY, familyDescriptor.getStoragePolicy()); + } + int blocksize = familyDescriptor.getBlocksize(); Algorithm compression = familyDescriptor.getCompressionType(); BloomType bloomFilterType = familyDescriptor.getBloomFilterType(); From 7a7fc49cd38d9cbec746af76d3f26ed515b6d794 Mon Sep 17 00:00:00 2001 From: Huginn Date: Sun, 21 Sep 2025 17:48:01 +0800 Subject: [PATCH 2/3] add ut --- .../apache/hadoop/hbase/HBaseTestingUtil.java | 36 +++++++++++++++ .../hadoop/hbase/tool/TestBulkLoadHFiles.java | 46 +++++++++++++++++++ 2 files changed, 82 insertions(+) diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtil.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtil.java index 93bde45a9d57..411fe8f0336d 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtil.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtil.java @@ -1374,6 +1374,42 @@ public Table createTable(TableDescriptor htd, byte[][] families, byte[][] splitK return getConnection().getTable(td.getTableName()); } + /** + * Create a table. + * @param htd table descriptor + * @param families array of column families + * @param splitKeys array of split keys + * @param type Bloom type + * @param blockSize block size + * @param storagePolicy storage policy + * @param c Configuration to use + * @return A Table instance for the created table. + * @throws IOException if getAdmin or createTable fails + */ + + public Table createTable(TableDescriptor htd, byte[][] families, byte[][] splitKeys, + BloomType type, int blockSize, String storagePolicy, Configuration c) throws IOException { + TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(htd); + for (byte[] family : families) { + ColumnFamilyDescriptorBuilder cfdb = ColumnFamilyDescriptorBuilder.newBuilder(family) + .setBloomFilterType(type).setBlocksize(blockSize).setStoragePolicy(storagePolicy); + if (isNewVersionBehaviorEnabled()) { + cfdb.setNewVersionBehavior(true); + } + builder.setColumnFamily(cfdb.build()); + } + TableDescriptor td = builder.build(); + if (splitKeys != null) { + getAdmin().createTable(td, splitKeys); + } else { + getAdmin().createTable(td); + } + // HBaseAdmin only waits for regions to appear in hbase:meta + // we should wait until they are assigned + waitUntilAllRegionsAssigned(td.getTableName()); + return getConnection().getTable(td.getTableName()); + } + /** * Create a table. * @param htd table descriptor diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestBulkLoadHFiles.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestBulkLoadHFiles.java index 40b5ef440b2b..d62ff24cd4cb 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestBulkLoadHFiles.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestBulkLoadHFiles.java @@ -22,6 +22,7 @@ import static org.hamcrest.Matchers.greaterThan; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertThrows; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -934,4 +935,49 @@ public void testFailIfNeedSplitHFile() throws IOException { util.getHBaseCluster().getRegions(tableName) .forEach(r -> assertEquals(1, r.getStore(FAMILY).getStorefiles().size())); } + + private String getStoragePolicyOfTmpDirWhenSplitHFile(Table table, String family) throws Exception { + TableName tableName = table.getName(); + util.loadTable(table, Bytes.toBytes(family)); + + FileSystem fs = util.getTestFileSystem(); + Path sfPath = new Path(fs.getWorkingDirectory(), new Path(family, "file")); + HFileTestUtil.createHFile(util.getConfiguration(), fs, sfPath, Bytes.toBytes(family), QUALIFIER, + Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000); + + util.getAdmin().split(tableName); + util.waitFor(10000, 1000, () -> util.getAdmin().getRegions(tableName).size() > 1); + + Path tmp = new Path(fs.getWorkingDirectory(), new Path(family, ".tmp")); + Configuration config = new Configuration(util.getConfiguration()); + BulkLoadHFilesTool tool = new BulkLoadHFilesTool(config); + String[] args = new String[] { fs.getWorkingDirectory().toString(), tableName.toString() }; + tool.run(args); + return fs.exists(tmp) ? fs.getStoragePolicy(tmp).getName() : null; + } + + @Test + public void testSplitHFileInSSDCluster() throws Exception { + TableName table = TableName.valueOf(tn.getMethodName()); + String family = "cf"; + String storage = getStoragePolicyOfTmpDirWhenSplitHFile( + util.createTable(table, Bytes.toBytes(family)), family); + assertNotEquals("ALL_SSD", storage); + assertEquals("HOT", storage); + + TableName tableSetSSDPolicy = TableName.valueOf(tn.getMethodName() + "_SSD"); + String familySsd = "cf_ssd"; + TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableSetSSDPolicy) + .setRegionReplication(1) + .build(); + assertEquals("ALL_SSD", getStoragePolicyOfTmpDirWhenSplitHFile( + util.createTable(htd, + new byte[][]{Bytes.toBytes(familySsd)}, + null, + BloomType.NONE, + HConstants.DEFAULT_BLOCKSIZE, + "ALL_SSD", + null), + familySsd)); + } } From c183bb542b5fc5e65aba8b3a035a0f2f91f65a90 Mon Sep 17 00:00:00 2001 From: Huginn Date: Tue, 23 Sep 2025 18:12:59 +0800 Subject: [PATCH 3/3] spotless check --- .../hadoop/hbase/tool/TestBulkLoadHFiles.java | 24 ++++++++----------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestBulkLoadHFiles.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestBulkLoadHFiles.java index d62ff24cd4cb..ff7e6fe53650 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestBulkLoadHFiles.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestBulkLoadHFiles.java @@ -936,7 +936,8 @@ public void testFailIfNeedSplitHFile() throws IOException { .forEach(r -> assertEquals(1, r.getStore(FAMILY).getStorefiles().size())); } - private String getStoragePolicyOfTmpDirWhenSplitHFile(Table table, String family) throws Exception { + private String getStoragePolicyOfTmpDirWhenSplitHFile(Table table, String family) + throws Exception { TableName tableName = table.getName(); util.loadTable(table, Bytes.toBytes(family)); @@ -961,23 +962,18 @@ public void testSplitHFileInSSDCluster() throws Exception { TableName table = TableName.valueOf(tn.getMethodName()); String family = "cf"; String storage = getStoragePolicyOfTmpDirWhenSplitHFile( - util.createTable(table, Bytes.toBytes(family)), family); + util.createTable(table, Bytes.toBytes(family)), family); assertNotEquals("ALL_SSD", storage); assertEquals("HOT", storage); TableName tableSetSSDPolicy = TableName.valueOf(tn.getMethodName() + "_SSD"); String familySsd = "cf_ssd"; - TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableSetSSDPolicy) - .setRegionReplication(1) - .build(); - assertEquals("ALL_SSD", getStoragePolicyOfTmpDirWhenSplitHFile( - util.createTable(htd, - new byte[][]{Bytes.toBytes(familySsd)}, - null, - BloomType.NONE, - HConstants.DEFAULT_BLOCKSIZE, - "ALL_SSD", - null), - familySsd)); + TableDescriptor htd = + TableDescriptorBuilder.newBuilder(tableSetSSDPolicy).setRegionReplication(1).build(); + assertEquals("ALL_SSD", + getStoragePolicyOfTmpDirWhenSplitHFile( + util.createTable(htd, new byte[][] { Bytes.toBytes(familySsd) }, null, BloomType.NONE, + HConstants.DEFAULT_BLOCKSIZE, "ALL_SSD", null), + familySsd)); } }