1
0

[HUDI-1932] Update Hive sync timestamp when change detected (#3053)

* Update Hive sync timestamp when change detected

Only update the last commit timestamp on the Hive table when the table schema
has changed or a partition is created/updated.

When using AWS Glue Data Catalog as the metastore for Hive this will ensure
that table versions are substantive (including schema and/or partition
changes). Prior to this change when a Hive sync is performed without schema
or partition changes the table in the Glue Data Catalog would have a new
version published with the only change being the timestamp property.

https://issues.apache.org/jira/browse/HUDI-1932

* add conditional sync flag

* fix testSyncWithoutDiffs

* fix HiveSyncConfig

Co-authored-by: Raymond Xu <2701446+xushiyan@users.noreply.github.com>
This commit is contained in:
Nate Radtke
2021-11-21 00:41:05 -06:00
committed by GitHub
parent 520538b15d
commit 887787e8b9
3 changed files with 51 additions and 5 deletions

View File

@@ -1017,4 +1017,35 @@ public class TestHiveSyncTool {
.containsValue("BIGINT"), errorMsg);
ddlExecutor.runSQL(dropTableSql);
}
@ParameterizedTest
@MethodSource("syncMode")
public void testSyncWithoutDiffs(String syncMode) throws Exception {
hiveSyncConfig.syncMode = syncMode;
hiveSyncConfig.isConditionalSync = true;
HiveTestUtil.hiveSyncConfig.batchSyncNum = 2;
String tableName = HiveTestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE;
String commitTime0 = "100";
String commitTime1 = "101";
String commitTime2 = "102";
HiveTestUtil.createMORTable(commitTime0, commitTime1, 2, true, true);
HoodieHiveClient hiveClient =
new HoodieHiveClient(HiveTestUtil.hiveSyncConfig, HiveTestUtil.getHiveConf(), HiveTestUtil.fileSystem);
HiveSyncTool tool = new HiveSyncTool(HiveTestUtil.hiveSyncConfig, HiveTestUtil.getHiveConf(), HiveTestUtil.fileSystem);
tool.syncHoodieTable();
assertTrue(hiveClient.doesTableExist(tableName));
assertEquals(commitTime1, hiveClient.getLastCommitTimeSynced(tableName).get());
HiveTestUtil.addMORPartitions(0, true, true, true, ZonedDateTime.now().plusDays(2), commitTime1, commitTime2);
tool = new HiveSyncTool(HiveTestUtil.hiveSyncConfig, HiveTestUtil.getHiveConf(), HiveTestUtil.fileSystem);
tool.syncHoodieTable();
hiveClient = new HoodieHiveClient(HiveTestUtil.hiveSyncConfig, HiveTestUtil.getHiveConf(), HiveTestUtil.fileSystem);
assertEquals(commitTime1, hiveClient.getLastCommitTimeSynced(tableName).get());
}
}