1
0

[HUDI-3107]Fix HiveSyncTool drop partitions using JDBC or hivesql or hms (#4453)

* constructDropPartitions when drop partitions using jdbc

* done

* done

* code style

* code review

Co-authored-by: yuezhang <yuezhang@freewheel.tv>
This commit is contained in:
YueZhang
2021-12-31 15:56:33 +08:00
committed by GitHub
parent 2444f40a4b
commit ef9923fc55
7 changed files with 169 additions and 8 deletions

View File

@@ -20,6 +20,7 @@ package org.apache.hudi.hive;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.testutils.NetworkTestUtils;
import org.apache.hudi.common.testutils.SchemaTestUtil;
@@ -787,6 +788,47 @@ public class TestHiveSyncTool {
"Table should have 1 partition because of the drop 1 partition");
}
@ParameterizedTest
@MethodSource("syncMode")
public void testDropPartition(String syncMode) throws Exception {
hiveSyncConfig.syncMode = syncMode;
HiveTestUtil.hiveSyncConfig.batchSyncNum = 3;
String instantTime = "100";
HiveTestUtil.createCOWTable(instantTime, 1, true);
HoodieHiveClient hiveClient =
new HoodieHiveClient(hiveSyncConfig, HiveTestUtil.getHiveConf(), fileSystem);
assertFalse(hiveClient.doesTableExist(hiveSyncConfig.tableName),
"Table " + hiveSyncConfig.tableName + " should not exist initially");
// Lets do the sync
HiveSyncTool tool = new HiveSyncTool(hiveSyncConfig, HiveTestUtil.getHiveConf(), fileSystem);
tool.syncHoodieTable();
// we need renew the hiveclient after tool.syncHoodieTable(), because it will close hive
// session, then lead to connection retry, we can see there is a exception at log.
hiveClient =
new HoodieHiveClient(HiveTestUtil.hiveSyncConfig, HiveTestUtil.getHiveConf(), HiveTestUtil.fileSystem);
assertTrue(hiveClient.doesTableExist(HiveTestUtil.hiveSyncConfig.tableName),
"Table " + HiveTestUtil.hiveSyncConfig.tableName + " should exist after sync completes");
assertEquals(hiveClient.getTableSchema(HiveTestUtil.hiveSyncConfig.tableName).size(),
hiveClient.getDataSchema().getColumns().size() + 1,
"Hive Schema should match the table schema + partition field");
assertEquals(1, hiveClient.scanTablePartitions(hiveSyncConfig.tableName).size(),
"Table partitions should match the number of partitions we wrote");
assertEquals(instantTime, hiveClient.getLastCommitTimeSynced(hiveSyncConfig.tableName).get(),
"The last commit that was synced should be updated in the TBLPROPERTIES");
// create a replace commit to delete current partitions
HiveTestUtil.createReplaceCommit("101", "2021/12/28", WriteOperationType.DELETE_PARTITION);
// sync drop partitins
tool = new HiveSyncTool(hiveSyncConfig, HiveTestUtil.getHiveConf(), fileSystem);
tool.syncHoodieTable();
hiveClient = new HoodieHiveClient(hiveSyncConfig, HiveTestUtil.getHiveConf(), fileSystem);
List<Partition> hivePartitions = hiveClient.scanTablePartitions(hiveSyncConfig.tableName);
assertEquals(0, hivePartitions.size(),
"Table should have 0 partition because of the drop the only one partition");
}
@ParameterizedTest
@MethodSource("syncMode")
public void testNonPartitionedSync(String syncMode) throws Exception {

View File

@@ -28,8 +28,10 @@ import org.apache.hudi.common.model.HoodieBaseFile;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieDeltaWriteStat;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodieReplaceCommitMetadata;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.log.HoodieLogFormat;
import org.apache.hudi.common.table.log.HoodieLogFormat.Writer;
@@ -176,6 +178,16 @@ public class HiveTestUtil {
createCommitFile(commitMetadata, instantTime);
}
public static void createReplaceCommit(String instantTime, String partitions, WriteOperationType type)
throws IOException {
HoodieReplaceCommitMetadata replaceCommitMetadata = new HoodieReplaceCommitMetadata();
replaceCommitMetadata.setOperationType(type);
Map<String, List<String>> partitionToReplaceFileIds = new HashMap<>();
partitionToReplaceFileIds.put(partitions, new ArrayList<>());
replaceCommitMetadata.setPartitionToReplaceFileIds(partitionToReplaceFileIds);
createReplaceCommitFile(replaceCommitMetadata, instantTime);
}
public static void createCOWTableWithSchema(String instantTime, String schemaFileName)
throws IOException, URISyntaxException {
Path path = new Path(hiveSyncConfig.basePath);
@@ -442,6 +454,15 @@ public class HiveTestUtil {
fsout.close();
}
public static void createReplaceCommitFile(HoodieCommitMetadata commitMetadata, String instantTime) throws IOException {
byte[] bytes = commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8);
Path fullPath = new Path(hiveSyncConfig.basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
+ HoodieTimeline.makeReplaceFileName(instantTime));
FSDataOutputStream fsout = fileSystem.create(fullPath, true);
fsout.write(bytes);
fsout.close();
}
public static void createCommitFileWithSchema(HoodieCommitMetadata commitMetadata, String instantTime, boolean isSimpleSchema) throws IOException {
addSchemaToCommitMetadata(commitMetadata, isSimpleSchema, true);
createCommitFile(commitMetadata, instantTime);