Skip to content

Commit ac3ea05

Browse files
authored
HIVE-28952: TableFetcher to return Table objects instead of names (#6020)
1 parent 78db8e9 commit ac3ea05

File tree

5 files changed

+57
-42
lines changed

5 files changed

+57
-42
lines changed

iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/compaction/IcebergTableOptimizer.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ public Set<CompactionInfo> findPotentialCompactions(long lastChecked, ShowCompac
8888
Set<String> skipDBs, Set<String> skipTables) {
8989
Set<CompactionInfo> compactionTargets = Sets.newHashSet();
9090

91-
getTables().stream()
91+
getTableNames().stream()
9292
.filter(table -> !skipDBs.contains(table.getDb()))
9393
.filter(table -> !skipTables.contains(table.getNotEmptyDbTable()))
9494
.map(table -> {
@@ -126,9 +126,9 @@ public Set<CompactionInfo> findPotentialCompactions(long lastChecked, ShowCompac
126126
return compactionTargets;
127127
}
128128

129-
private List<org.apache.hadoop.hive.common.TableName> getTables() {
129+
private List<org.apache.hadoop.hive.common.TableName> getTableNames() {
130130
try {
131-
return IcebergTableUtil.getTableFetcher(client, null, "*", null).getTables();
131+
return IcebergTableUtil.getTableFetcher(client, null, "*", null).getTableNames();
132132
} catch (Exception e) {
133133
throw new RuntimeMetaException(e, "Error getting table names");
134134
}

iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/metastore/task/IcebergHouseKeeperService.java

Lines changed: 8 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,13 @@
2929
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
3030
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
3131
import org.apache.hadoop.hive.metastore.MetastoreTaskThread;
32-
import org.apache.hadoop.hive.metastore.api.GetTableRequest;
3332
import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
3433
import org.apache.hadoop.hive.metastore.txn.NoMutex;
3534
import org.apache.hadoop.hive.metastore.txn.TxnStore;
3635
import org.apache.hadoop.hive.metastore.txn.TxnUtils;
3736
import org.apache.iceberg.ExpireSnapshots;
3837
import org.apache.iceberg.Table;
3938
import org.apache.iceberg.mr.hive.IcebergTableUtil;
40-
import org.apache.thrift.TException;
4139
import org.slf4j.Logger;
4240
import org.slf4j.LoggerFactory;
4341

@@ -79,35 +77,21 @@ public void run() {
7977

8078
private void expireTables(String catalogName, String dbPattern, String tablePattern) {
8179
try (IMetaStoreClient msc = new HiveMetaStoreClient(conf)) {
82-
// TODO: HIVE-28952 – modify TableFetcher to return HMS Table API objects directly,
83-
// avoiding the need for subsequent msc.getTable calls to fetch each matched table individually
84-
List<TableName> tables = IcebergTableUtil.getTableFetcher(msc, catalogName, dbPattern, tablePattern).getTables();
85-
80+
int maxBatchSize = MetastoreConf.getIntVar(conf, MetastoreConf.ConfVars.BATCH_RETRIEVE_MAX);
81+
List<org.apache.hadoop.hive.metastore.api.Table> tables =
82+
IcebergTableUtil.getTableFetcher(msc, catalogName, dbPattern, tablePattern).getTables(maxBatchSize);
8683
LOG.debug("{} candidate tables found", tables.size());
87-
88-
for (TableName table : tables) {
89-
try {
90-
expireSnapshotsForTable(getIcebergTable(table, msc));
91-
} catch (Exception e) {
92-
LOG.error("Exception while running iceberg expiry service on catalog/db/table: {}/{}/{}",
93-
catalogName, dbPattern, tablePattern, e);
94-
}
84+
for (org.apache.hadoop.hive.metastore.api.Table table : tables) {
85+
expireSnapshotsForTable(getIcebergTable(table));
9586
}
9687
} catch (Exception e) {
9788
throw new RuntimeException("Error while getting tables from metastore", e);
9889
}
9990
}
10091

101-
private Table getIcebergTable(TableName tableName, IMetaStoreClient msc) {
102-
return tableCache.get(tableName, key -> {
103-
LOG.debug("Getting iceberg table from metastore as it's not present in table cache: {}", tableName);
104-
GetTableRequest request = new GetTableRequest(tableName.getDb(), tableName.getTable());
105-
try {
106-
return IcebergTableUtil.getTable(conf, msc.getTable(request));
107-
} catch (TException e) {
108-
throw new RuntimeException(e);
109-
}
110-
});
92+
private Table getIcebergTable(org.apache.hadoop.hive.metastore.api.Table table) {
93+
TableName tableName = TableName.fromString(table.getTableName(), table.getCatName(), table.getDbName());
94+
return tableCache.get(tableName, key -> IcebergTableUtil.getTable(conf, table));
11195
}
11296

11397
/**

iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/metastore/task/TestIcebergHouseKeeperService.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,11 @@
2323
import java.util.Collections;
2424
import java.util.List;
2525
import java.util.Optional;
26-
import org.apache.hadoop.hive.common.TableName;
2726
import org.apache.hadoop.hive.conf.HiveConf;
2827
import org.apache.hadoop.hive.metastore.TableType;
2928
import org.apache.hadoop.hive.metastore.api.FieldSchema;
3029
import org.apache.hadoop.hive.metastore.api.GetTableRequest;
30+
import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
3131
import org.apache.hadoop.hive.metastore.utils.TableFetcher;
3232
import org.apache.hadoop.hive.ql.metadata.Hive;
3333
import org.apache.hadoop.hive.ql.metadata.Table;
@@ -69,8 +69,11 @@ public void testIcebergTableFetched() throws Exception {
6969

7070
TableFetcher tableFetcher = IcebergTableUtil.getTableFetcher(db.getMSC(), null, "default", "*");
7171

72-
List<TableName> tables = tableFetcher.getTables();
73-
Assert.assertEquals(new TableName("hive", "default", "iceberg_table"), tables.get(0));
72+
int maxBatchSize = MetastoreConf.getIntVar(conf, MetastoreConf.ConfVars.BATCH_RETRIEVE_MAX);
73+
List<org.apache.hadoop.hive.metastore.api.Table> tables = tableFetcher.getTables(maxBatchSize);
74+
Assert.assertEquals("hive", tables.get(0).getCatName());
75+
Assert.assertEquals("default", tables.get(0).getDbName());
76+
Assert.assertEquals("iceberg_table", tables.get(0).getTableName());
7477
}
7578

7679
@Test

standalone-metastore/metastore-client/src/main/java/org/apache/hadoop/hive/metastore/utils/TableFetcher.java

Lines changed: 39 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,11 @@
2020
import com.google.common.annotations.VisibleForTesting;
2121
import org.apache.hadoop.hive.common.TableName;
2222
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
23+
import org.apache.hadoop.hive.metastore.TableIterable;
2324
import org.apache.hadoop.hive.metastore.TableType;
2425
import org.apache.hadoop.hive.metastore.Warehouse;
2526
import org.apache.hadoop.hive.metastore.api.Database;
27+
import org.apache.hadoop.hive.metastore.api.Table;
2628
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
2729
import org.slf4j.Logger;
2830
import org.slf4j.LoggerFactory;
@@ -90,7 +92,7 @@ private void buildTableFilter(String tablePattern, List<String> conditions) {
9092
this.tableFilter = String.join(" and ", conditions);
9193
}
9294

93-
public List<TableName> getTables() throws Exception {
95+
public List<TableName> getTableNames() throws Exception {
9496
List<TableName> candidates = new ArrayList<>();
9597

9698
// if tableTypes is empty, then a list with single empty string has to specified to scan no tables.
@@ -102,21 +104,47 @@ public List<TableName> getTables() throws Exception {
102104
List<String> databases = client.getDatabases(catalogName, dbPattern);
103105

104106
for (String db : databases) {
105-
Database database = client.getDatabase(catalogName, db);
106-
if (MetaStoreUtils.checkIfDbNeedsToBeSkipped(database)) {
107-
LOG.debug("Skipping table under database: {}", db);
108-
continue;
109-
}
110-
if (MetaStoreUtils.isDbBeingPlannedFailedOver(database)) {
111-
LOG.info("Skipping table that belongs to database {} being failed over.", db);
112-
continue;
113-
}
114-
List<String> tablesNames = client.listTableNamesByFilter(catalogName, db, tableFilter, -1);
107+
List<String> tablesNames = getTableNamesForDatabase(catalogName, db);
115108
tablesNames.forEach(tablesName -> candidates.add(TableName.fromString(tablesName, catalogName, db)));
116109
}
117110
return candidates;
118111
}
119112

113+
public List<Table> getTables(int maxBatchSize) throws Exception {
114+
List<Table> candidates = new ArrayList<>();
115+
116+
// if tableTypes is empty, then a list with single empty string has to specified to scan no tables.
117+
if (tableTypes.isEmpty()) {
118+
LOG.info("Table fetcher returns empty list as no table types specified");
119+
return candidates;
120+
}
121+
122+
List<String> databases = client.getDatabases(catalogName, dbPattern);
123+
124+
for (String db : databases) {
125+
List<String> tablesNames = getTableNamesForDatabase(catalogName, db);
126+
for (Table table : new TableIterable(client, db, tablesNames, maxBatchSize)) {
127+
candidates.add(table);
128+
}
129+
}
130+
return candidates;
131+
}
132+
133+
private List<String> getTableNamesForDatabase(String catalogName, String dbName) throws Exception {
134+
List<String> tableNames = new ArrayList<>();
135+
Database database = client.getDatabase(catalogName, dbName);
136+
if (MetaStoreUtils.checkIfDbNeedsToBeSkipped(database)) {
137+
LOG.debug("Skipping table under database: {}", dbName);
138+
return tableNames;
139+
}
140+
if (MetaStoreUtils.isDbBeingPlannedFailedOver(database)) {
141+
LOG.info("Skipping table that belongs to database {} being failed over.", dbName);
142+
return tableNames;
143+
}
144+
tableNames = client.listTableNamesByFilter(catalogName, dbName, tableFilter, -1);
145+
return tableNames;
146+
}
147+
120148
public static class Builder {
121149
private final IMetaStoreClient client;
122150
private final String catalogName;

standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartitionManagementTask.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ public void run() {
101101
.tableCondition(
102102
hive_metastoreConstants.HIVE_FILTER_FIELD_PARAMS + "discover__partitions like \"true\" ")
103103
.build()
104-
.getTables();
104+
.getTableNames();
105105

106106
if (candidates.isEmpty()) {
107107
LOG.info("Got empty table list in catalog: {}, dbPattern: {}", catalogName, dbPattern);

0 commit comments

Comments
 (0)