[HUDI-3107]Fix HiveSyncTool drop partitions using JDBC or hivesql or hms (#4453)
* constructDropPartitions when drop partitions using jdbc * done * done * code style * code review Co-authored-by: yuezhang <yuezhang@freewheel.tv>
This commit is contained in:
@@ -20,6 +20,7 @@ package org.apache.hudi.hive;
|
||||
|
||||
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.model.WriteOperationType;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.testutils.NetworkTestUtils;
|
||||
import org.apache.hudi.common.testutils.SchemaTestUtil;
|
||||
@@ -787,6 +788,47 @@ public class TestHiveSyncTool {
|
||||
"Table should have 1 partition because of the drop 1 partition");
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@MethodSource("syncMode")
|
||||
public void testDropPartition(String syncMode) throws Exception {
|
||||
hiveSyncConfig.syncMode = syncMode;
|
||||
HiveTestUtil.hiveSyncConfig.batchSyncNum = 3;
|
||||
String instantTime = "100";
|
||||
HiveTestUtil.createCOWTable(instantTime, 1, true);
|
||||
|
||||
HoodieHiveClient hiveClient =
|
||||
new HoodieHiveClient(hiveSyncConfig, HiveTestUtil.getHiveConf(), fileSystem);
|
||||
assertFalse(hiveClient.doesTableExist(hiveSyncConfig.tableName),
|
||||
"Table " + hiveSyncConfig.tableName + " should not exist initially");
|
||||
// Lets do the sync
|
||||
HiveSyncTool tool = new HiveSyncTool(hiveSyncConfig, HiveTestUtil.getHiveConf(), fileSystem);
|
||||
tool.syncHoodieTable();
|
||||
// we need renew the hiveclient after tool.syncHoodieTable(), because it will close hive
|
||||
// session, then lead to connection retry, we can see there is a exception at log.
|
||||
hiveClient =
|
||||
new HoodieHiveClient(HiveTestUtil.hiveSyncConfig, HiveTestUtil.getHiveConf(), HiveTestUtil.fileSystem);
|
||||
assertTrue(hiveClient.doesTableExist(HiveTestUtil.hiveSyncConfig.tableName),
|
||||
"Table " + HiveTestUtil.hiveSyncConfig.tableName + " should exist after sync completes");
|
||||
assertEquals(hiveClient.getTableSchema(HiveTestUtil.hiveSyncConfig.tableName).size(),
|
||||
hiveClient.getDataSchema().getColumns().size() + 1,
|
||||
"Hive Schema should match the table schema + partition field");
|
||||
assertEquals(1, hiveClient.scanTablePartitions(hiveSyncConfig.tableName).size(),
|
||||
"Table partitions should match the number of partitions we wrote");
|
||||
assertEquals(instantTime, hiveClient.getLastCommitTimeSynced(hiveSyncConfig.tableName).get(),
|
||||
"The last commit that was synced should be updated in the TBLPROPERTIES");
|
||||
// create a replace commit to delete current partitions
|
||||
HiveTestUtil.createReplaceCommit("101", "2021/12/28", WriteOperationType.DELETE_PARTITION);
|
||||
|
||||
// sync drop partitins
|
||||
tool = new HiveSyncTool(hiveSyncConfig, HiveTestUtil.getHiveConf(), fileSystem);
|
||||
tool.syncHoodieTable();
|
||||
|
||||
hiveClient = new HoodieHiveClient(hiveSyncConfig, HiveTestUtil.getHiveConf(), fileSystem);
|
||||
List<Partition> hivePartitions = hiveClient.scanTablePartitions(hiveSyncConfig.tableName);
|
||||
assertEquals(0, hivePartitions.size(),
|
||||
"Table should have 0 partition because of the drop the only one partition");
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@MethodSource("syncMode")
|
||||
public void testNonPartitionedSync(String syncMode) throws Exception {
|
||||
|
||||
@@ -28,8 +28,10 @@ import org.apache.hudi.common.model.HoodieBaseFile;
|
||||
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
||||
import org.apache.hudi.common.model.HoodieDeltaWriteStat;
|
||||
import org.apache.hudi.common.model.HoodieLogFile;
|
||||
import org.apache.hudi.common.model.HoodieReplaceCommitMetadata;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
import org.apache.hudi.common.model.HoodieWriteStat;
|
||||
import org.apache.hudi.common.model.WriteOperationType;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.log.HoodieLogFormat;
|
||||
import org.apache.hudi.common.table.log.HoodieLogFormat.Writer;
|
||||
@@ -176,6 +178,16 @@ public class HiveTestUtil {
|
||||
createCommitFile(commitMetadata, instantTime);
|
||||
}
|
||||
|
||||
public static void createReplaceCommit(String instantTime, String partitions, WriteOperationType type)
|
||||
throws IOException {
|
||||
HoodieReplaceCommitMetadata replaceCommitMetadata = new HoodieReplaceCommitMetadata();
|
||||
replaceCommitMetadata.setOperationType(type);
|
||||
Map<String, List<String>> partitionToReplaceFileIds = new HashMap<>();
|
||||
partitionToReplaceFileIds.put(partitions, new ArrayList<>());
|
||||
replaceCommitMetadata.setPartitionToReplaceFileIds(partitionToReplaceFileIds);
|
||||
createReplaceCommitFile(replaceCommitMetadata, instantTime);
|
||||
}
|
||||
|
||||
public static void createCOWTableWithSchema(String instantTime, String schemaFileName)
|
||||
throws IOException, URISyntaxException {
|
||||
Path path = new Path(hiveSyncConfig.basePath);
|
||||
@@ -442,6 +454,15 @@ public class HiveTestUtil {
|
||||
fsout.close();
|
||||
}
|
||||
|
||||
public static void createReplaceCommitFile(HoodieCommitMetadata commitMetadata, String instantTime) throws IOException {
|
||||
byte[] bytes = commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8);
|
||||
Path fullPath = new Path(hiveSyncConfig.basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
|
||||
+ HoodieTimeline.makeReplaceFileName(instantTime));
|
||||
FSDataOutputStream fsout = fileSystem.create(fullPath, true);
|
||||
fsout.write(bytes);
|
||||
fsout.close();
|
||||
}
|
||||
|
||||
public static void createCommitFileWithSchema(HoodieCommitMetadata commitMetadata, String instantTime, boolean isSimpleSchema) throws IOException {
|
||||
addSchemaToCommitMetadata(commitMetadata, isSimpleSchema, true);
|
||||
createCommitFile(commitMetadata, instantTime);
|
||||
|
||||
Reference in New Issue
Block a user