[HUDI-137] Hudi cleaning state changes should be consistent with compaction actions
Before this change, Cleaner performs cleaning of old file versions and then stores the deleted files in .clean files. With this setup, we will not be able to track file deletions if a cleaner fails after deleting files but before writing .clean metadata. This is fine for regular file-system view generation but Incremental timeline syncing relies on clean/commit/compaction metadata to keep a consistent file-system view. Cleaner state transitions is now similar to that of compaction. 1. Requested : HoodieWriteClient.scheduleClean() selects the list of files that needs to be deleted and stores them in metadata 2. Inflight : HoodieWriteClient marks the state to be inflight before it starts deleting 3. Completed : HoodieWriteClient marks the state after completing the deletion according to the cleaner plan
This commit is contained in:
committed by
Balaji Varadarajan
parent
23b303e4b1
commit
1032fc3e54
@@ -62,25 +62,30 @@ public class HiveSyncTool {
|
||||
}
|
||||
|
||||
public void syncHoodieTable() throws ClassNotFoundException {
|
||||
switch (hoodieHiveClient.getTableType()) {
|
||||
case COPY_ON_WRITE:
|
||||
syncHoodieTable(false);
|
||||
break;
|
||||
case MERGE_ON_READ:
|
||||
// sync a RO table for MOR
|
||||
syncHoodieTable(false);
|
||||
String originalTableName = cfg.tableName;
|
||||
// TODO : Make realtime table registration optional using a config param
|
||||
cfg.tableName = cfg.tableName + SUFFIX_REALTIME_TABLE;
|
||||
// sync a RT table for MOR
|
||||
syncHoodieTable(true);
|
||||
cfg.tableName = originalTableName;
|
||||
break;
|
||||
default:
|
||||
LOG.error("Unknown table type " + hoodieHiveClient.getTableType());
|
||||
throw new InvalidDatasetException(hoodieHiveClient.getBasePath());
|
||||
try {
|
||||
switch (hoodieHiveClient.getTableType()) {
|
||||
case COPY_ON_WRITE:
|
||||
syncHoodieTable(false);
|
||||
break;
|
||||
case MERGE_ON_READ:
|
||||
// sync a RO table for MOR
|
||||
syncHoodieTable(false);
|
||||
String originalTableName = cfg.tableName;
|
||||
// TODO : Make realtime table registration optional using a config param
|
||||
cfg.tableName = cfg.tableName + SUFFIX_REALTIME_TABLE;
|
||||
// sync a RT table for MOR
|
||||
syncHoodieTable(true);
|
||||
cfg.tableName = originalTableName;
|
||||
break;
|
||||
default:
|
||||
LOG.error("Unknown table type " + hoodieHiveClient.getTableType());
|
||||
throw new InvalidDatasetException(hoodieHiveClient.getBasePath());
|
||||
}
|
||||
} catch (RuntimeException re) {
|
||||
LOG.error("Got runtime exception when hive syncing", re);
|
||||
} finally {
|
||||
hoodieHiveClient.close();
|
||||
}
|
||||
hoodieHiveClient.close();
|
||||
}
|
||||
|
||||
private void syncHoodieTable(boolean isRealTime) throws ClassNotFoundException {
|
||||
|
||||
Reference in New Issue
Block a user