1
0

[HUDI-2116] Support batch synchronization of partition datas to hive metastore to avoid oom problem (#3209)

This commit is contained in:
xiarixiaoyao
2021-07-04 22:30:36 +08:00
committed by GitHub
parent 62a1ad8b3a
commit 6a71412f78
6 changed files with 51 additions and 7 deletions

View File

@@ -89,6 +89,7 @@ public class TestHiveSyncTool {
@MethodSource({"useJdbcAndSchemaFromCommitMetadata"})
public void testBasicSync(boolean useJdbc, boolean useSchemaFromCommitMetadata) throws Exception {
HiveTestUtil.hiveSyncConfig.useJdbc = useJdbc;
HiveTestUtil.hiveSyncConfig.batchSyncNum = 3;
String instantTime = "100";
HiveTestUtil.createCOWTable(instantTime, 5, useSchemaFromCommitMetadata);
HoodieHiveClient hiveClient =
@@ -160,6 +161,7 @@ public class TestHiveSyncTool {
public void testSyncCOWTableWithProperties(boolean useJdbc,
boolean useSchemaFromCommitMetadata) throws Exception {
HiveSyncConfig hiveSyncConfig = HiveTestUtil.hiveSyncConfig;
HiveTestUtil.hiveSyncConfig.batchSyncNum = 3;
Map<String, String> serdeProperties = new HashMap<String, String>() {
{
put("path", hiveSyncConfig.basePath);
@@ -214,6 +216,7 @@ public class TestHiveSyncTool {
public void testSyncMORTableWithProperties(boolean useJdbc,
boolean useSchemaFromCommitMetadata) throws Exception {
HiveSyncConfig hiveSyncConfig = HiveTestUtil.hiveSyncConfig;
HiveTestUtil.hiveSyncConfig.batchSyncNum = 3;
Map<String, String> serdeProperties = new HashMap<String, String>() {
{
put("path", hiveSyncConfig.basePath);
@@ -312,6 +315,7 @@ public class TestHiveSyncTool {
@MethodSource("useJdbc")
public void testSyncIncremental(boolean useJdbc) throws Exception {
HiveTestUtil.hiveSyncConfig.useJdbc = useJdbc;
HiveTestUtil.hiveSyncConfig.batchSyncNum = 2;
String commitTime1 = "100";
HiveTestUtil.createCOWTable(commitTime1, 5, true);
HoodieHiveClient hiveClient =
@@ -351,6 +355,7 @@ public class TestHiveSyncTool {
@MethodSource("useJdbc")
public void testSyncIncrementalWithSchemaEvolution(boolean useJdbc) throws Exception {
HiveTestUtil.hiveSyncConfig.useJdbc = useJdbc;
HiveTestUtil.hiveSyncConfig.batchSyncNum = 2;
String commitTime1 = "100";
HiveTestUtil.createCOWTable(commitTime1, 5, true);
HoodieHiveClient hiveClient =
@@ -388,6 +393,7 @@ public class TestHiveSyncTool {
@MethodSource("useJdbcAndSchemaFromCommitMetadata")
public void testSyncMergeOnRead(boolean useJdbc, boolean useSchemaFromCommitMetadata) throws Exception {
HiveTestUtil.hiveSyncConfig.useJdbc = useJdbc;
HiveTestUtil.hiveSyncConfig.batchSyncNum = 2;
String instantTime = "100";
String deltaCommitTime = "101";
HiveTestUtil.createMORTable(instantTime, deltaCommitTime, 5, true,
@@ -454,6 +460,7 @@ public class TestHiveSyncTool {
@MethodSource("useJdbcAndSchemaFromCommitMetadata")
public void testSyncMergeOnReadRT(boolean useJdbc, boolean useSchemaFromCommitMetadata) throws Exception {
HiveTestUtil.hiveSyncConfig.useJdbc = useJdbc;
HiveTestUtil.hiveSyncConfig.batchSyncNum = 2;
String instantTime = "100";
String deltaCommitTime = "101";
String snapshotTableName = HiveTestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE;
@@ -524,6 +531,7 @@ public class TestHiveSyncTool {
@MethodSource("useJdbc")
public void testMultiPartitionKeySync(boolean useJdbc) throws Exception {
HiveTestUtil.hiveSyncConfig.useJdbc = useJdbc;
HiveTestUtil.hiveSyncConfig.batchSyncNum = 2;
String instantTime = "100";
HiveTestUtil.createCOWTable(instantTime, 5, true);
@@ -598,6 +606,7 @@ public class TestHiveSyncTool {
@MethodSource("useJdbc")
public void testNonPartitionedSync(boolean useJdbc) throws Exception {
HiveTestUtil.hiveSyncConfig.useJdbc = useJdbc;
HiveTestUtil.hiveSyncConfig.batchSyncNum = 2;
String instantTime = "100";
HiveTestUtil.createCOWTable(instantTime, 5, true);
@@ -627,6 +636,7 @@ public class TestHiveSyncTool {
@MethodSource("useJdbc")
public void testReadSchemaForMOR(boolean useJdbc) throws Exception {
HiveTestUtil.hiveSyncConfig.useJdbc = useJdbc;
HiveTestUtil.hiveSyncConfig.batchSyncNum = 2;
String commitTime = "100";
String snapshotTableName = HiveTestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE;
HiveTestUtil.createMORTable(commitTime, "", 5, false, true);
@@ -675,6 +685,7 @@ public class TestHiveSyncTool {
@Test
public void testConnectExceptionIgnoreConfigSet() throws IOException, URISyntaxException {
HiveTestUtil.hiveSyncConfig.useJdbc = true;
HiveTestUtil.hiveSyncConfig.batchSyncNum = 2;
String instantTime = "100";
HiveTestUtil.createCOWTable(instantTime, 5, false);
HoodieHiveClient hiveClient =
@@ -720,6 +731,7 @@ public class TestHiveSyncTool {
@MethodSource("useJdbc")
public void testPickingOlderParquetFileIfLatestIsEmptyCommit(boolean useJdbc) throws Exception {
HiveTestUtil.hiveSyncConfig.useJdbc = useJdbc;
HiveTestUtil.hiveSyncConfig.batchSyncNum = 2;
final String commitTime = "100";
HiveTestUtil.createCOWTable(commitTime, 1, true);
HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata();
@@ -740,6 +752,7 @@ public class TestHiveSyncTool {
@MethodSource("useJdbc")
public void testNotPickingOlderParquetFileWhenLatestCommitReadFails(boolean useJdbc) throws Exception {
HiveTestUtil.hiveSyncConfig.useJdbc = useJdbc;
HiveTestUtil.hiveSyncConfig.batchSyncNum = 2;
final String commitTime = "100";
HiveTestUtil.createCOWTable(commitTime, 1, true);
HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata();
@@ -782,6 +795,7 @@ public class TestHiveSyncTool {
@MethodSource("useJdbc")
public void testNotPickingOlderParquetFileWhenLatestCommitReadFailsForExistingTable(boolean useJdbc) throws Exception {
HiveTestUtil.hiveSyncConfig.useJdbc = useJdbc;
HiveTestUtil.hiveSyncConfig.batchSyncNum = 2;
final String commitTime = "100";
HiveTestUtil.createCOWTable(commitTime, 1, true);
HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata();
@@ -828,6 +842,7 @@ public class TestHiveSyncTool {
@MethodSource("useJdbc")
public void testTypeConverter(boolean useJdbc) throws Exception {
HiveTestUtil.hiveSyncConfig.useJdbc = useJdbc;
HiveTestUtil.hiveSyncConfig.batchSyncNum = 2;
HiveTestUtil.createCOWTable("100", 5, true);
HoodieHiveClient hiveClient =
new HoodieHiveClient(HiveTestUtil.hiveSyncConfig, HiveTestUtil.getHiveConf(), HiveTestUtil.fileSystem);