Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.conf.HiveConf;
Expand Down Expand Up @@ -59,6 +60,7 @@
import java.util.List;
import java.util.Map;
import java.io.File;
import java.io.IOException;
import java.util.stream.Collectors;
import java.util.Arrays;

Expand All @@ -68,7 +70,9 @@
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.fail;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyBoolean;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.doThrow;
import static org.mockito.Mockito.eq;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
Expand Down Expand Up @@ -846,16 +850,20 @@ public void testDropTableNoTablePathWritePermissionShouldFail() throws Exception
.build(conf);
hmsHandler.create_table(table);

Path tablePath = new Path(table.getSd().getLocation());
when(wh.isWritable(Mockito.eq(tablePath.getParent()))).thenReturn(true);
when(wh.isWritable(Mockito.eq(tablePath))).thenReturn(false);
FileStatus fileStatus = Mockito.mock(FileStatus.class);
when(fileStatus.isDirectory()).thenReturn(true);
when(fileStatus.getPath()).thenReturn(new Path(tblName));
when(wh.getFs(new Path(tblName)).listStatus(new Path(tblName))).thenReturn(new FileStatus[] { fileStatus });

doThrow(new IOException("Failed to delete director:"))
.when(wh.getFs(new Path(tblName))).delete(any(Path.class), anyBoolean());

try {
hmsHandler.drop_table("default", tblName, true);
} catch (MetaException e) {
String expected = "%s metadata not deleted since %s is not writable by %s"
.formatted("Table", tablePath.toString(), authorizedUser);
assertEquals(expected, e.getMessage());
hmsHandler.drop_table(default_db, tblName, true);
fail("Expected exception to be thrown due to lack of write permission");
} catch (IOException e) {
String expected = "Failed to delete director:";
assertTrue(e.getMessage().contains(expected));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3050,17 +3050,9 @@ private boolean drop_table_core(final RawStore ms, final String catName, final S
firePreEvent(new PreDropTableEvent(tbl, deleteData, this));

tableDataShouldBeDeleted = checkTableDataShouldBeDeleted(tbl, deleteData);

if (tableDataShouldBeDeleted && tbl.getSd().getLocation() != null) {
Copy link
Member

@deniskuzZ deniskuzZ Jul 31, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@zxl-333 initially, you added the permission check in HIVE-28804, mentioning that some Hive clusters don't use Ranger and now you're removing it?
I'm not sure I understand the reasoning.

I see you've added ms.rollbackTransaction(). So the flow is: make a DB call to drop the metadata, and then depending on whether the directory on the filesystem was successfully removed, you either commit or roll back, right?

Copy link
Contributor Author

@zxl-333 zxl-333 Aug 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@deniskuzZ When Ranger is not enabled, there is no issue with HIVE-28804. However, when Ranger is enabled, the HMS code fails to retrieve authorization information from Ranger during verification, leading to the following situation: even though write permissions for the table have been granted through Ranger, if the HDFS ACL permissions do not include write access, the user will be prompted with a "no write permission" error, resulting in the failure to drop the table.

Now, we need to resolve the issue where table deletion fails when permissions are available and succeeds when permissions are unavailable, regardless of whether Ranger is enabled. The solution is to skip the verification of HDFS ACL permissions for database and table directories. Instead, the permission verification during deletion should be entrusted to the NameNode. If the deletion fails due to permission issues, the metadata must also be rolled back, and the engine side will receive the exception information about the deletion failure.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@zxl-333, please share detailed step-by-step reproduce. I don't recall any customer using Ranger raising escalations related to that functionality.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@zxl-333 - Based on the scenario you mentioned, I believe this is a Hive or Ranger config issue.
Regarding Hive config, what is hive.server2.enable.doAs set to? It should be set to true. By doing so, assuming that you are running queries from jdbc client like beeline, when an end-user runs some query, user 'hive' does all the operations in the HDFS. Now if an end-user drops a table, user 'hive' will try to drop the data for the table path and its partitions. Now user 'hive' is having issues with the permissions with the path that leads to the Ranger config issue.
Regarding ranger config, user 'hive' should be added in hdfs service in the ranger policy for hive warehouse table path, and I believe the issue here because of this missing policy in hdfs service. Also, when ranger policies are missing, the Hadoop ACLs permissions takes into effect.

So I feel, this is not a product issue, and you might have to abandon this patch.

Copy link
Contributor Author

@zxl-333 zxl-333 Aug 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@saihemanth-cloudera @deniskuzZ
Do not use ranger. Refer to the steps in HIVE-18888.
When using ranger, even if ranger has been authorized, it may still result in the failure of the ACL write permission verification in HDFS, preventing the deletion of the table.

specific steps are as follows:
1、User_a has only read permissions for the test db and the following tables in HDFS.
hdfs://nn1/usr/warhouse/hive/test.db
2、However, now the read and write permissions for the 'test' db and its tables have been granted through the 'ranger' authorization.
3. If 'user_a' attempts to delete the 'test_1' table within the 'test' library, the following exception will be thrown
test_1 metadata not deleted since hdfs://nn//usr/warhouse/hive/test.db is not writable by user_a
The exception thrown above will result in the user being unable to delete even if the ranger authorization is granted.

Therefore, regardless of whether the ranger permission check is enabled or not, in order to ensure that users do not have read or write permissions for the database tables, the operation must fail and the metadata database transaction must be rolled back.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From your comments, you seem to use Ranger only for metadata permissions and not file permissions (HDFS service is not being used in Ranger for auth OR there are no policies for user 'hive'). So you would rely on Hadoop ACLs for file authorization. So (2) you mentioned doesn't apply. And (3) is expected to happen and it should happen in the absence of Ranger related Hadoop policies.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. After enabling the ranger control of HDFS file permissions, it is usually not necessary to use HDFS's ACL permissions for re-authorization. If this modification is not made, then after using ranger to control HDFS permissions, it is still necessary to grant HDFS ACL permissions to the user. This is unacceptable because the user needs to be authorized twice for a table path.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@zxl-333 -- Can you please give me a detailed repro of this issue (probably a doc with screenshots with ranger policies, core-site.xml (in Hadoop conf) etc.), as previously asked by @deniskuzZ as well.
Because I cannot get this whatsoever

then after using ranger to control HDFS permissions, it is still necessary to grant HDFS ACL permissions to the user.

If you are using to control HDFS permissions, then you don't need to grant the end user the required permissions.
I'm guessing that you are missing this config in your core-site.xml of Hadoop service:
<property> <name>hadoop.proxyuser.hive.hosts</name> <value>*</value> </property>
<property> <name>hadoop.proxyuser.hive.groups</name> <value>*</value> </property>

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The core-site has already set these parameters.
steps:
1、create database and table
24bf816baa5448ed92ef7269d72c83f1_compress
2、Check the HDFS permissions for the database and table
0a25709a32c2ed837f03c68cbf32d851_compress
3、Grant all permissions to the dmp_query user through the ranger authorization.
1dfedc7f796e09fc67dcdc5be1e2be59_compress
9cca9ae0906ab9a47fcb7e18623d4d4f_compress
4、Verify whether the dmp_query user has write permissions
73e4316733764c64bde4c67767b6054f_compress
5、Use the dmp_query user to drop this table
b286af7dd47afffdffbe0d65c0f9781e_compress
We observed that although ranger was authorized, the inability to delete was caused by the permission verification stored in HDFS.

tblPath = new Path(tbl.getSd().getLocation());
String target = indexName == null ? "Table" : "Index table";
// HIVE-28804 drop table user should have table path and parent path permission
if (!wh.isWritable(tblPath.getParent())) {
throw new MetaException("%s metadata not deleted since %s is not writable by %s"
.formatted(target, tblPath.getParent(), SecurityUtils.getUser()));
} else if (!wh.isWritable(tblPath)) {
throw new MetaException("%s metadata not deleted since %s is not writable by %s"
.formatted(target, tblPath, SecurityUtils.getUser()));
}
}

// Drop the partitions and get a list of locations which need to be deleted
Expand All @@ -3084,17 +3076,22 @@ private boolean drop_table_core(final RawStore ms, final String catName, final S
this, isReplicated),
envContext);
}
if (tableDataShouldBeDeleted) {
// Data needs deletion. Check if trash may be skipped.
// Delete the data in the partitions which have other locations
deletePartitionData(partPaths, ifPurge, ReplChangeManager.shouldEnableCm(db, tbl));
// Delete the data in the table
deleteTableData(tblPath, ifPurge, ReplChangeManager.shouldEnableCm(db, tbl));
}
success = ms.commitTransaction();
}
} catch (Exception e) {
ms.rollbackTransaction();
LOG.error("Failed to drop table " + name + ", rolling back transaction", e);
throw new IOException(org.apache.hadoop.util.StringUtils.stringifyException(e));
} finally {
if (!success) {
ms.rollbackTransaction();
} else if (tableDataShouldBeDeleted) {
// Data needs deletion. Check if trash may be skipped.
// Delete the data in the partitions which have other locations
deletePartitionData(partPaths, ifPurge, ReplChangeManager.shouldEnableCm(db, tbl));
// Delete the data in the table
deleteTableData(tblPath, ifPurge, ReplChangeManager.shouldEnableCm(db, tbl));
}

if (!listeners.isEmpty()) {
Expand Down Expand Up @@ -3124,7 +3121,7 @@ private boolean checkTableDataShouldBeDeleted(Table tbl, boolean deleteData) {
* data from warehouse
* @param shouldEnableCm If cm should be enabled
*/
private void deleteTableData(Path tablePath, boolean ifPurge, boolean shouldEnableCm) {
private void deleteTableData(Path tablePath, boolean ifPurge, boolean shouldEnableCm) throws IOException {
if (tablePath != null) {
deleteDataExcludeCmroot(tablePath, ifPurge, shouldEnableCm);
}
Expand Down Expand Up @@ -3158,7 +3155,7 @@ private void deleteTableData(Path tablePath, boolean ifPurge, Database db) {
* removing data from warehouse
* @param shouldEnableCm If cm should be enabled
*/
private void deletePartitionData(List<Path> partPaths, boolean ifPurge, boolean shouldEnableCm) {
private void deletePartitionData(List<Path> partPaths, boolean ifPurge, boolean shouldEnableCm) throws IOException {
if (partPaths != null && !partPaths.isEmpty()) {
for (Path partPath : partPaths) {
deleteDataExcludeCmroot(partPath, ifPurge, shouldEnableCm);
Expand Down Expand Up @@ -3197,7 +3194,7 @@ private void deletePartitionData(List<Path> partPaths, boolean ifPurge, Database
* removing data from warehouse
* @param shouldEnableCm If cm should be enabled
*/
private void deleteDataExcludeCmroot(Path path, boolean ifPurge, boolean shouldEnableCm) {
private void deleteDataExcludeCmroot(Path path, boolean ifPurge, boolean shouldEnableCm) throws IOException {
try {
if (shouldEnableCm) {
//Don't delete cmdir if its inside the partition path
Expand All @@ -3217,6 +3214,8 @@ private void deleteDataExcludeCmroot(Path path, boolean ifPurge, boolean shouldE
}
} catch (Exception e) {
LOG.error("Failed to delete directory: {}", path, e);
throw new IOException("Failed to delete directory: " + path.toString() + " . Exception detail : "
+ org.apache.hadoop.util.StringUtils.stringifyException(e));
}
}

Expand Down
Loading