1
0

[HUDI-1486] Remove inline inflight rollback in hoodie writer (#2359)

1. Refactor rollback and move cleaning failed commits logic into cleaner
2. Introduce hoodie heartbeat to ascertain failed commits
3. Fix test cases
This commit is contained in:
n3nash
2021-02-19 20:12:22 -08:00
committed by GitHub
parent c9fcf964b2
commit ffcfb58bac
64 changed files with 1541 additions and 306 deletions

View File

@@ -69,7 +69,7 @@ public class HoodieCleaner {
public void run() {
HoodieWriteConfig hoodieCfg = getHoodieClientConfig();
SparkRDDWriteClient client = new SparkRDDWriteClient<>(new HoodieSparkEngineContext(jssc), hoodieCfg, false);
SparkRDDWriteClient client = new SparkRDDWriteClient<>(new HoodieSparkEngineContext(jssc), hoodieCfg);
client.clean();
}

View File

@@ -85,11 +85,11 @@ public class HoodieSnapshotCopier implements Serializable {
final SerializableConfiguration serConf = new SerializableConfiguration(jsc.hadoopConfiguration());
final HoodieTableMetaClient tableMetadata = HoodieTableMetaClient.builder().setConf(fs.getConf()).setBasePath(baseDir).build();
final BaseFileOnlyView fsView = new HoodieTableFileSystemView(tableMetadata,
tableMetadata.getActiveTimeline().getCommitsAndCompactionTimeline().filterCompletedInstants());
tableMetadata.getActiveTimeline().getWriteTimeline().filterCompletedInstants());
HoodieEngineContext context = new HoodieSparkEngineContext(jsc);
// Get the latest commit
Option<HoodieInstant> latestCommit =
tableMetadata.getActiveTimeline().getCommitsAndCompactionTimeline().filterCompletedInstants().lastInstant();
tableMetadata.getActiveTimeline().getWriteTimeline().filterCompletedInstants().lastInstant();
if (!latestCommit.isPresent()) {
LOG.warn("No commits present. Nothing to snapshot");
return;

View File

@@ -149,7 +149,7 @@ public class HoodieSnapshotExporter {
private Option<String> getLatestCommitTimestamp(FileSystem fs, Config cfg) {
final HoodieTableMetaClient tableMetadata = HoodieTableMetaClient.builder().setConf(fs.getConf()).setBasePath(cfg.sourceBasePath).build();
Option<HoodieInstant> latestCommit = tableMetadata.getActiveTimeline().getCommitsAndCompactionTimeline()
Option<HoodieInstant> latestCommit = tableMetadata.getActiveTimeline().getWriteTimeline()
.filterCompletedInstants().lastInstant();
return latestCommit.isPresent() ? Option.of(latestCommit.get().getTimestamp()) : Option.empty();
}
@@ -261,7 +261,7 @@ public class HoodieSnapshotExporter {
FileSystem fs = FSUtils.getFs(cfg.sourceBasePath, jsc.hadoopConfiguration());
HoodieTableMetaClient tableMetadata = HoodieTableMetaClient.builder().setConf(fs.getConf()).setBasePath(cfg.sourceBasePath).build();
return new HoodieTableFileSystemView(tableMetadata, tableMetadata
.getActiveTimeline().getCommitsAndCompactionTimeline().filterCompletedInstants());
.getActiveTimeline().getWriteTimeline().filterCompletedInstants());
}
public static void main(String[] args) throws IOException {

View File

@@ -138,7 +138,7 @@ public class BootstrapExecutor implements Serializable {
*/
public void execute() throws IOException {
initializeTable();
SparkRDDWriteClient bootstrapClient = new SparkRDDWriteClient(new HoodieSparkEngineContext(jssc), bootstrapConfig, true);
SparkRDDWriteClient bootstrapClient = new SparkRDDWriteClient(new HoodieSparkEngineContext(jssc), bootstrapConfig);
try {
HashMap<String, String> checkpointCommitMetadata = new HashMap<>();

View File

@@ -598,7 +598,7 @@ public class DeltaSync implements Serializable {
// Close Write client.
writeClient.close();
}
writeClient = new SparkRDDWriteClient<>(new HoodieSparkEngineContext(jssc), hoodieCfg, true, embeddedTimelineService);
writeClient = new SparkRDDWriteClient<>(new HoodieSparkEngineContext(jssc), hoodieCfg, embeddedTimelineService);
onInitializingHoodieWriteClient.apply(writeClient);
}