1
0

[HUDI-3370] The files recorded in the commit may not match the actual ones for MOR Compaction (#4753)

* use HoodieCommitMetadata to replace writeStatuses computation

Co-authored-by: yuezhang <yuezhang@freewheel.tv>
This commit is contained in:
YueZhang
2022-02-14 11:12:52 +08:00
committed by GitHub
parent 55777fec05
commit 76e2faa28d
17 changed files with 129 additions and 114 deletions

View File

@@ -883,7 +883,7 @@ public abstract class BaseHoodieWriteClient<T extends HoodieRecordPayload, I, K,
* @param compactionInstantTime Compaction Instant Time * @param compactionInstantTime Compaction Instant Time
* @return Collection of WriteStatus to inspect errors and counts * @return Collection of WriteStatus to inspect errors and counts
*/ */
public O compact(String compactionInstantTime) { public HoodieWriteMetadata<O> compact(String compactionInstantTime) {
return compact(compactionInstantTime, config.shouldAutoCommit()); return compact(compactionInstantTime, config.shouldAutoCommit());
} }
@@ -891,17 +891,16 @@ public abstract class BaseHoodieWriteClient<T extends HoodieRecordPayload, I, K,
* Commit a compaction operation. Allow passing additional meta-data to be stored in commit instant file. * Commit a compaction operation. Allow passing additional meta-data to be stored in commit instant file.
* *
* @param compactionInstantTime Compaction Instant Time * @param compactionInstantTime Compaction Instant Time
* @param writeStatuses Collection of WriteStatus to inspect errors and counts * @param metadata All the metadata that gets stored along with a commit
* @param extraMetadata Extra Metadata to be stored * @param extraMetadata Extra Metadata to be stored
*/ */
public abstract void commitCompaction(String compactionInstantTime, O writeStatuses, public abstract void commitCompaction(String compactionInstantTime, HoodieCommitMetadata metadata,
Option<Map<String, String>> extraMetadata) throws IOException; Option<Map<String, String>> extraMetadata);
/** /**
* Commit Compaction and track metrics. * Commit Compaction and track metrics.
*/ */
protected abstract void completeCompaction(HoodieCommitMetadata metadata, O writeStatuses, protected abstract void completeCompaction(HoodieCommitMetadata metadata, HoodieTable<T, I, K, O> table, String compactionCommitTime);
HoodieTable<T, I, K, O> table, String compactionCommitTime);
/** /**
* Get inflight time line exclude compaction and clustering. * Get inflight time line exclude compaction and clustering.
@@ -1023,7 +1022,7 @@ public abstract class BaseHoodieWriteClient<T extends HoodieRecordPayload, I, K,
* @param compactionInstantTime Compaction Instant Time * @param compactionInstantTime Compaction Instant Time
* @return Collection of Write Status * @return Collection of Write Status
*/ */
protected abstract O compact(String compactionInstantTime, boolean shouldComplete); protected abstract HoodieWriteMetadata<O> compact(String compactionInstantTime, boolean shouldComplete);
/** /**
* Performs a compaction operation on a table, serially before or after an insert/upsert action. * Performs a compaction operation on a table, serially before or after an insert/upsert action.

View File

@@ -39,7 +39,6 @@ import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieCommitException; import org.apache.hudi.exception.HoodieCommitException;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieNotSupportedException; import org.apache.hudi.exception.HoodieNotSupportedException;
import org.apache.hudi.index.FlinkHoodieIndexFactory; import org.apache.hudi.index.FlinkHoodieIndexFactory;
import org.apache.hudi.index.HoodieIndex; import org.apache.hudi.index.HoodieIndex;
@@ -68,7 +67,6 @@ import org.apache.hadoop.conf.Configuration;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.text.ParseException; import java.text.ParseException;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
@@ -346,23 +344,20 @@ public class HoodieFlinkWriteClient<T extends HoodieRecordPayload> extends
@Override @Override
public void commitCompaction( public void commitCompaction(
String compactionInstantTime, String compactionInstantTime,
List<WriteStatus> writeStatuses, HoodieCommitMetadata metadata,
Option<Map<String, String>> extraMetadata) throws IOException { Option<Map<String, String>> extraMetadata) {
HoodieFlinkTable<T> table = getHoodieTable(); HoodieFlinkTable<T> table = getHoodieTable();
HoodieCommitMetadata metadata = CompactHelpers.getInstance().createCompactionMetadata(
table, compactionInstantTime, HoodieList.of(writeStatuses), config.getSchema());
extraMetadata.ifPresent(m -> m.forEach(metadata::addMetadata)); extraMetadata.ifPresent(m -> m.forEach(metadata::addMetadata));
completeCompaction(metadata, writeStatuses, table, compactionInstantTime); completeCompaction(metadata, table, compactionInstantTime);
} }
@Override @Override
public void completeCompaction( public void completeCompaction(
HoodieCommitMetadata metadata, HoodieCommitMetadata metadata,
List<WriteStatus> writeStatuses,
HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> table, HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> table,
String compactionCommitTime) { String compactionCommitTime) {
this.context.setJobStatus(this.getClass().getSimpleName(), "Collect compaction write status and commit compaction"); this.context.setJobStatus(this.getClass().getSimpleName(), "Collect compaction write status and commit compaction");
List<HoodieWriteStat> writeStats = writeStatuses.stream().map(WriteStatus::getStat).collect(Collectors.toList()); List<HoodieWriteStat> writeStats = metadata.getWriteStats();
final HoodieInstant compactionInstant = new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, compactionCommitTime); final HoodieInstant compactionInstant = new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, compactionCommitTime);
try { try {
this.txnManager.beginTransaction(Option.of(compactionInstant), Option.empty()); this.txnManager.beginTransaction(Option.of(compactionInstant), Option.empty());
@@ -391,16 +386,11 @@ public class HoodieFlinkWriteClient<T extends HoodieRecordPayload> extends
} }
@Override @Override
protected List<WriteStatus> compact(String compactionInstantTime, boolean shouldComplete) { protected HoodieWriteMetadata<List<WriteStatus>> compact(String compactionInstantTime, boolean shouldComplete) {
// only used for metadata table, the compaction happens in single thread // only used for metadata table, the compaction happens in single thread
try { HoodieWriteMetadata<List<WriteStatus>> compactionMetadata = getHoodieTable().compact(context, compactionInstantTime);
List<WriteStatus> writeStatuses = commitCompaction(compactionInstantTime, compactionMetadata.getCommitMetadata().get(), Option.empty());
getHoodieTable().compact(context, compactionInstantTime).getWriteStatuses(); return compactionMetadata;
commitCompaction(compactionInstantTime, writeStatuses, Option.empty());
return writeStatuses;
} catch (IOException e) {
throw new HoodieException("Error while compacting instant: " + compactionInstantTime);
}
} }
@Override @Override

View File

@@ -44,7 +44,6 @@ import org.apache.hudi.table.action.HoodieWriteMetadata;
import com.codahale.metrics.Timer; import com.codahale.metrics.Timer;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import java.io.IOException;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.stream.Collectors; import java.util.stream.Collectors;
@@ -210,21 +209,20 @@ public class HoodieJavaWriteClient<T extends HoodieRecordPayload> extends
@Override @Override
public void commitCompaction(String compactionInstantTime, public void commitCompaction(String compactionInstantTime,
List<WriteStatus> writeStatuses, HoodieCommitMetadata metadata,
Option<Map<String, String>> extraMetadata) throws IOException { Option<Map<String, String>> extraMetadata) {
throw new HoodieNotSupportedException("CommitCompaction is not supported in HoodieJavaClient"); throw new HoodieNotSupportedException("CommitCompaction is not supported in HoodieJavaClient");
} }
@Override @Override
protected void completeCompaction(HoodieCommitMetadata metadata, protected void completeCompaction(HoodieCommitMetadata metadata,
List<WriteStatus> writeStatuses,
HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> table, HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> table,
String compactionCommitTime) { String compactionCommitTime) {
throw new HoodieNotSupportedException("CompleteCompaction is not supported in HoodieJavaClient"); throw new HoodieNotSupportedException("CompleteCompaction is not supported in HoodieJavaClient");
} }
@Override @Override
protected List<WriteStatus> compact(String compactionInstantTime, protected HoodieWriteMetadata<List<WriteStatus>> compact(String compactionInstantTime,
boolean shouldComplete) { boolean shouldComplete) {
throw new HoodieNotSupportedException("Compact is not supported in HoodieJavaClient"); throw new HoodieNotSupportedException("Compact is not supported in HoodieJavaClient");
} }

View File

@@ -22,14 +22,17 @@ import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecordPayload; import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.table.timeline.HoodieInstant; import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.Option;
import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.table.action.HoodieWriteMetadata;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import java.io.IOException; import java.util.List;
public class HoodieSparkCompactor<T extends HoodieRecordPayload> extends BaseCompactor<T, public class HoodieSparkCompactor<T extends HoodieRecordPayload> extends BaseCompactor<T,
JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> { JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> {
@@ -43,12 +46,12 @@ public class HoodieSparkCompactor<T extends HoodieRecordPayload> extends BaseCom
} }
@Override @Override
public void compact(HoodieInstant instant) throws IOException { public void compact(HoodieInstant instant) {
LOG.info("Compactor executing compaction " + instant); LOG.info("Compactor executing compaction " + instant);
SparkRDDWriteClient<T> writeClient = (SparkRDDWriteClient<T>) compactionClient; SparkRDDWriteClient<T> writeClient = (SparkRDDWriteClient<T>) compactionClient;
JavaRDD<WriteStatus> res = writeClient.compact(instant.getTimestamp()); HoodieWriteMetadata<JavaRDD<WriteStatus>> compactionMetadata = writeClient.compact(instant.getTimestamp());
this.context.setJobStatus(this.getClass().getSimpleName(), "Collect compaction write status"); List<HoodieWriteStat> writeStats = compactionMetadata.getCommitMetadata().get().getWriteStats();
long numWriteErrors = res.collect().stream().filter(WriteStatus::hasErrors).count(); long numWriteErrors = writeStats.stream().mapToLong(HoodieWriteStat::getTotalWriteErrors).sum();
if (numWriteErrors != 0) { if (numWriteErrors != 0) {
// We treat even a single error in compaction as fatal // We treat even a single error in compaction as fatal
LOG.error("Compaction for instant (" + instant + ") failed with write errors. Errors :" + numWriteErrors); LOG.error("Compaction for instant (" + instant + ") failed with write errors. Errors :" + numWriteErrors);
@@ -56,6 +59,6 @@ public class HoodieSparkCompactor<T extends HoodieRecordPayload> extends BaseCom
"Compaction for instant (" + instant + ") failed with write errors. Errors :" + numWriteErrors); "Compaction for instant (" + instant + ") failed with write errors. Errors :" + numWriteErrors);
} }
// Commit compaction // Commit compaction
writeClient.commitCompaction(instant.getTimestamp(), res, Option.empty()); writeClient.commitCompaction(instant.getTimestamp(), compactionMetadata.getCommitMetadata().get(), Option.empty());
} }
} }

View File

@@ -65,7 +65,6 @@ import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import java.io.IOException;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.text.ParseException; import java.text.ParseException;
import java.util.List; import java.util.List;
@@ -286,20 +285,18 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
} }
@Override @Override
public void commitCompaction(String compactionInstantTime, JavaRDD<WriteStatus> writeStatuses, Option<Map<String, String>> extraMetadata) throws IOException { public void commitCompaction(String compactionInstantTime, HoodieCommitMetadata metadata, Option<Map<String, String>> extraMetadata) {
HoodieSparkTable<T> table = HoodieSparkTable.create(config, context); HoodieSparkTable<T> table = HoodieSparkTable.create(config, context);
HoodieCommitMetadata metadata = CompactHelpers.getInstance().createCompactionMetadata(
table, compactionInstantTime, HoodieJavaRDD.of(writeStatuses), config.getSchema());
extraMetadata.ifPresent(m -> m.forEach(metadata::addMetadata)); extraMetadata.ifPresent(m -> m.forEach(metadata::addMetadata));
completeCompaction(metadata, writeStatuses, table, compactionInstantTime); completeCompaction(metadata, table, compactionInstantTime);
} }
@Override @Override
protected void completeCompaction(HoodieCommitMetadata metadata, JavaRDD<WriteStatus> writeStatuses, protected void completeCompaction(HoodieCommitMetadata metadata,
HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> table, HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> table,
String compactionCommitTime) { String compactionCommitTime) {
this.context.setJobStatus(this.getClass().getSimpleName(), "Collect compaction write status and commit compaction"); this.context.setJobStatus(this.getClass().getSimpleName(), "Collect compaction write status and commit compaction");
List<HoodieWriteStat> writeStats = writeStatuses.map(WriteStatus::getStat).collect(); List<HoodieWriteStat> writeStats = metadata.getWriteStats();
final HoodieInstant compactionInstant = new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, compactionCommitTime); final HoodieInstant compactionInstant = new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, compactionCommitTime);
try { try {
this.txnManager.beginTransaction(Option.of(compactionInstant), Option.empty()); this.txnManager.beginTransaction(Option.of(compactionInstant), Option.empty());
@@ -327,7 +324,7 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
} }
@Override @Override
protected JavaRDD<WriteStatus> compact(String compactionInstantTime, boolean shouldComplete) { protected HoodieWriteMetadata<JavaRDD<WriteStatus>> compact(String compactionInstantTime, boolean shouldComplete) {
HoodieSparkTable<T> table = HoodieSparkTable.create(config, context, true); HoodieSparkTable<T> table = HoodieSparkTable.create(config, context, true);
preWrite(compactionInstantTime, WriteOperationType.COMPACT, table.getMetaClient()); preWrite(compactionInstantTime, WriteOperationType.COMPACT, table.getMetaClient());
HoodieTimeline pendingCompactionTimeline = table.getActiveTimeline().filterPendingCompactionTimeline(); HoodieTimeline pendingCompactionTimeline = table.getActiveTimeline().filterPendingCompactionTimeline();
@@ -339,11 +336,10 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
compactionTimer = metrics.getCompactionCtx(); compactionTimer = metrics.getCompactionCtx();
HoodieWriteMetadata<JavaRDD<WriteStatus>> compactionMetadata = HoodieWriteMetadata<JavaRDD<WriteStatus>> compactionMetadata =
table.compact(context, compactionInstantTime); table.compact(context, compactionInstantTime);
JavaRDD<WriteStatus> statuses = compactionMetadata.getWriteStatuses();
if (shouldComplete && compactionMetadata.getCommitMetadata().isPresent()) { if (shouldComplete && compactionMetadata.getCommitMetadata().isPresent()) {
completeTableService(TableServiceType.COMPACT, compactionMetadata.getCommitMetadata().get(), statuses, table, compactionInstantTime); completeTableService(TableServiceType.COMPACT, compactionMetadata.getCommitMetadata().get(), table, compactionInstantTime);
} }
return statuses; return compactionMetadata;
} }
@Override @Override
@@ -359,15 +355,14 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
clusteringTimer = metrics.getClusteringCtx(); clusteringTimer = metrics.getClusteringCtx();
LOG.info("Starting clustering at " + clusteringInstant); LOG.info("Starting clustering at " + clusteringInstant);
HoodieWriteMetadata<JavaRDD<WriteStatus>> clusteringMetadata = table.cluster(context, clusteringInstant); HoodieWriteMetadata<JavaRDD<WriteStatus>> clusteringMetadata = table.cluster(context, clusteringInstant);
JavaRDD<WriteStatus> statuses = clusteringMetadata.getWriteStatuses();
// TODO : Where is shouldComplete used ? // TODO : Where is shouldComplete used ?
if (shouldComplete && clusteringMetadata.getCommitMetadata().isPresent()) { if (shouldComplete && clusteringMetadata.getCommitMetadata().isPresent()) {
completeTableService(TableServiceType.CLUSTER, clusteringMetadata.getCommitMetadata().get(), statuses, table, clusteringInstant); completeTableService(TableServiceType.CLUSTER, clusteringMetadata.getCommitMetadata().get(), table, clusteringInstant);
} }
return clusteringMetadata; return clusteringMetadata;
} }
private void completeClustering(HoodieReplaceCommitMetadata metadata, JavaRDD<WriteStatus> writeStatuses, private void completeClustering(HoodieReplaceCommitMetadata metadata,
HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> table, HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> table,
String clusteringCommitTime) { String clusteringCommitTime) {
@@ -469,16 +464,16 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
} }
// TODO : To enforce priority between table service and ingestion writer, use transactions here and invoke strategy // TODO : To enforce priority between table service and ingestion writer, use transactions here and invoke strategy
private void completeTableService(TableServiceType tableServiceType, HoodieCommitMetadata metadata, JavaRDD<WriteStatus> writeStatuses, private void completeTableService(TableServiceType tableServiceType, HoodieCommitMetadata metadata,
HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> table, HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> table,
String commitInstant) { String commitInstant) {
switch (tableServiceType) { switch (tableServiceType) {
case CLUSTER: case CLUSTER:
completeClustering((HoodieReplaceCommitMetadata) metadata, writeStatuses, table, commitInstant); completeClustering((HoodieReplaceCommitMetadata) metadata, table, commitInstant);
break; break;
case COMPACT: case COMPACT:
completeCompaction(metadata, writeStatuses, table, commitInstant); completeCompaction(metadata, table, commitInstant);
break; break;
default: default:
throw new IllegalArgumentException("This table service is not valid " + tableServiceType); throw new IllegalArgumentException("This table service is not valid " + tableServiceType);

View File

@@ -38,6 +38,7 @@ import org.apache.hudi.config.HoodieCompactionConfig;
import org.apache.hudi.config.HoodieLockConfig; import org.apache.hudi.config.HoodieLockConfig;
import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieWriteConflictException; import org.apache.hudi.exception.HoodieWriteConflictException;
import org.apache.hudi.table.action.HoodieWriteMetadata;
import org.apache.hudi.testutils.HoodieClientTestBase; import org.apache.hudi.testutils.HoodieClientTestBase;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
@@ -364,8 +365,8 @@ public class TestHoodieClientMultiWriter extends HoodieClientTestBase {
latchCountDownAndWait(runCountDownLatch, 30000); latchCountDownAndWait(runCountDownLatch, 30000);
if (tableType == HoodieTableType.MERGE_ON_READ) { if (tableType == HoodieTableType.MERGE_ON_READ) {
assertDoesNotThrow(() -> { assertDoesNotThrow(() -> {
JavaRDD<WriteStatus> writeStatusJavaRDD = (JavaRDD<WriteStatus>) client2.compact("005"); HoodieWriteMetadata<JavaRDD<WriteStatus>> compactionMetadata = client2.compact("005");
client2.commitCompaction("005", writeStatusJavaRDD, Option.empty()); client2.commitCompaction("005", compactionMetadata.getCommitMetadata().get(), Option.empty());
validInstants.add("005"); validInstants.add("005");
}); });
} }

View File

@@ -45,6 +45,7 @@ import org.apache.hudi.index.HoodieIndex;
import org.apache.hudi.index.HoodieIndex.IndexType; import org.apache.hudi.index.HoodieIndex.IndexType;
import org.apache.hudi.metadata.HoodieTableMetadataWriter; import org.apache.hudi.metadata.HoodieTableMetadataWriter;
import org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter; import org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter;
import org.apache.hudi.table.action.HoodieWriteMetadata;
import org.apache.hudi.table.action.deltacommit.BaseSparkDeltaCommitActionExecutor; import org.apache.hudi.table.action.deltacommit.BaseSparkDeltaCommitActionExecutor;
import org.apache.hudi.table.action.deltacommit.SparkDeleteDeltaCommitActionExecutor; import org.apache.hudi.table.action.deltacommit.SparkDeleteDeltaCommitActionExecutor;
import org.apache.hudi.testutils.HoodieClientTestUtils; import org.apache.hudi.testutils.HoodieClientTestUtils;
@@ -258,7 +259,7 @@ public class TestHoodieMergeOnReadTable extends SparkClientFunctionalTestHarness
// Do a compaction // Do a compaction
String compactionInstantTime = writeClient.scheduleCompaction(Option.empty()).get().toString(); String compactionInstantTime = writeClient.scheduleCompaction(Option.empty()).get().toString();
JavaRDD<WriteStatus> result = (JavaRDD<WriteStatus>) writeClient.compact(compactionInstantTime); HoodieWriteMetadata<JavaRDD<WriteStatus>> result = writeClient.compact(compactionInstantTime);
// Verify that recently written compacted data file has no log file // Verify that recently written compacted data file has no log file
metaClient = HoodieTableMetaClient.reload(metaClient); metaClient = HoodieTableMetaClient.reload(metaClient);
@@ -275,8 +276,7 @@ public class TestHoodieMergeOnReadTable extends SparkClientFunctionalTestHarness
for (FileSlice slice : groupedLogFiles) { for (FileSlice slice : groupedLogFiles) {
assertEquals(0, slice.getLogFiles().count(), "After compaction there should be no log files visible on a full view"); assertEquals(0, slice.getLogFiles().count(), "After compaction there should be no log files visible on a full view");
} }
List<WriteStatus> writeStatuses = result.collect(); assertTrue(result.getCommitMetadata().get().getWritePartitionPaths().stream().anyMatch(part -> part.contentEquals(partitionPath)));
assertTrue(writeStatuses.stream().anyMatch(writeStatus -> writeStatus.getStat().getPartitionPath().contentEquals(partitionPath)));
} }
// Check the entire dataset has all records still // Check the entire dataset has all records still
@@ -442,8 +442,9 @@ public class TestHoodieMergeOnReadTable extends SparkClientFunctionalTestHarness
// Test small file handling after compaction // Test small file handling after compaction
instantTime = "002"; instantTime = "002";
client.scheduleCompactionAtInstant(instantTime, Option.of(metadata.getExtraMetadata())); client.scheduleCompactionAtInstant(instantTime, Option.of(metadata.getExtraMetadata()));
statuses = (JavaRDD<WriteStatus>) client.compact(instantTime); HoodieWriteMetadata<JavaRDD<WriteStatus>> compactionMetadata = client.compact(instantTime);
client.commitCompaction(instantTime, statuses, Option.empty()); statuses = compactionMetadata.getWriteStatuses();
client.commitCompaction(instantTime, compactionMetadata.getCommitMetadata().get(), Option.empty());
// Read from commit file // Read from commit file
table = HoodieSparkTable.create(cfg, context()); table = HoodieSparkTable.create(cfg, context());

View File

@@ -28,6 +28,7 @@ import org.apache.hudi.common.model.HoodieFileFormat;
import org.apache.hudi.common.model.HoodieLogFile; import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.table.HoodieTableConfig; import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieInstant; import org.apache.hudi.common.table.timeline.HoodieInstant;
@@ -41,6 +42,7 @@ import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.index.HoodieIndex; import org.apache.hudi.index.HoodieIndex;
import org.apache.hudi.table.HoodieSparkTable; import org.apache.hudi.table.HoodieSparkTable;
import org.apache.hudi.table.HoodieTable; import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.HoodieWriteMetadata;
import org.apache.hudi.testutils.HoodieClientTestUtils; import org.apache.hudi.testutils.HoodieClientTestUtils;
import org.apache.hudi.testutils.HoodieMergeOnReadTestUtils; import org.apache.hudi.testutils.HoodieMergeOnReadTestUtils;
import org.apache.hudi.testutils.SparkClientFunctionalTestHarness; import org.apache.hudi.testutils.SparkClientFunctionalTestHarness;
@@ -56,6 +58,7 @@ import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource; import org.junit.jupiter.params.provider.MethodSource;
import org.junit.jupiter.params.provider.ValueSource; import org.junit.jupiter.params.provider.ValueSource;
import java.util.Collection;
import java.util.List; import java.util.List;
import java.util.Properties; import java.util.Properties;
import java.util.stream.Collectors; import java.util.stream.Collectors;
@@ -307,11 +310,12 @@ public class TestHoodieSparkMergeOnReadTableInsertUpdateDelete extends SparkClie
assertTrue(numLogFiles > 0); assertTrue(numLogFiles > 0);
// Do a compaction // Do a compaction
String instantTime = writeClient.scheduleCompaction(Option.empty()).get().toString(); String instantTime = writeClient.scheduleCompaction(Option.empty()).get().toString();
statuses = (JavaRDD<WriteStatus>) writeClient.compact(instantTime); HoodieWriteMetadata<JavaRDD<WriteStatus>> compactionMetadata = writeClient.compact(instantTime);
String extension = table.getBaseFileExtension(); String extension = table.getBaseFileExtension();
assertEquals(numLogFiles, statuses.map(status -> status.getStat().getPath().contains(extension)).count()); Collection<List<HoodieWriteStat>> stats = compactionMetadata.getCommitMetadata().get().getPartitionToWriteStats().values();
assertEquals(numLogFiles, statuses.count()); assertEquals(numLogFiles, stats.stream().flatMap(Collection::stream).filter(state -> state.getPath().contains(extension)).count());
writeClient.commitCompaction(instantTime, statuses, Option.empty()); assertEquals(numLogFiles, stats.stream().mapToLong(Collection::size).sum());
writeClient.commitCompaction(instantTime, compactionMetadata.getCommitMetadata().get(), Option.empty());
} }
} }
} }

View File

@@ -30,6 +30,7 @@ import org.apache.hudi.common.model.HoodieFileGroup;
import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecordPayload; import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.table.HoodieTableConfig; import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.marker.MarkerType; import org.apache.hudi.common.table.marker.MarkerType;
@@ -50,6 +51,7 @@ import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.index.HoodieIndex; import org.apache.hudi.index.HoodieIndex;
import org.apache.hudi.table.HoodieSparkTable; import org.apache.hudi.table.HoodieSparkTable;
import org.apache.hudi.table.HoodieTable; import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.HoodieWriteMetadata;
import org.apache.hudi.testutils.HoodieMergeOnReadTestUtils; import org.apache.hudi.testutils.HoodieMergeOnReadTestUtils;
import org.apache.hudi.testutils.SparkClientFunctionalTestHarness; import org.apache.hudi.testutils.SparkClientFunctionalTestHarness;
@@ -455,8 +457,8 @@ public class TestHoodieSparkMergeOnReadTableRollback extends SparkClientFunction
compactionInstantTime = "006"; compactionInstantTime = "006";
client.scheduleCompactionAtInstant(compactionInstantTime, Option.empty()); client.scheduleCompactionAtInstant(compactionInstantTime, Option.empty());
JavaRDD<WriteStatus> ws = (JavaRDD<WriteStatus>) client.compact(compactionInstantTime); HoodieWriteMetadata<JavaRDD<WriteStatus>> compactionMetadata = client.compact(compactionInstantTime);
client.commitCompaction(compactionInstantTime, ws, Option.empty()); client.commitCompaction(compactionInstantTime, compactionMetadata.getCommitMetadata().get(), Option.empty());
allFiles = listAllBaseFilesInPath(hoodieTable); allFiles = listAllBaseFilesInPath(hoodieTable);
metaClient = HoodieTableMetaClient.reload(metaClient); metaClient = HoodieTableMetaClient.reload(metaClient);
@@ -543,8 +545,8 @@ public class TestHoodieSparkMergeOnReadTableRollback extends SparkClientFunction
metaClient = HoodieTableMetaClient.reload(metaClient); metaClient = HoodieTableMetaClient.reload(metaClient);
String compactionInstantTime = "005"; String compactionInstantTime = "005";
client.scheduleCompactionAtInstant(compactionInstantTime, Option.empty()); client.scheduleCompactionAtInstant(compactionInstantTime, Option.empty());
JavaRDD<WriteStatus> ws = (JavaRDD<WriteStatus>) client.compact(compactionInstantTime); HoodieWriteMetadata<JavaRDD<WriteStatus>> compactionMetadata = client.compact(compactionInstantTime);
client.commitCompaction(compactionInstantTime, ws, Option.empty()); client.commitCompaction(compactionInstantTime, compactionMetadata.getCommitMetadata().get(), Option.empty());
validateRecords(cfg, metaClient, updates3); validateRecords(cfg, metaClient, updates3);
List<HoodieRecord> updates4 = updateAndGetRecords("006", client, dataGen, records); List<HoodieRecord> updates4 = updateAndGetRecords("006", client, dataGen, records);
@@ -755,11 +757,14 @@ public class TestHoodieSparkMergeOnReadTableRollback extends SparkClientFunction
assertTrue(numLogFiles > 0); assertTrue(numLogFiles > 0);
// Do a compaction // Do a compaction
newCommitTime = writeClient.scheduleCompaction(Option.empty()).get().toString(); newCommitTime = writeClient.scheduleCompaction(Option.empty()).get().toString();
statuses = (JavaRDD<WriteStatus>) writeClient.compact(newCommitTime); HoodieWriteMetadata<JavaRDD<WriteStatus>> compactionMetadata = writeClient.compact(newCommitTime);
statuses = compactionMetadata.getWriteStatuses();
// Ensure all log files have been compacted into base files // Ensure all log files have been compacted into base files
String extension = table.getBaseFileExtension(); String extension = table.getBaseFileExtension();
assertEquals(numLogFiles, statuses.map(status -> status.getStat().getPath().contains(extension)).count()); Collection<List<HoodieWriteStat>> stats = compactionMetadata.getCommitMetadata().get().getPartitionToWriteStats().values();
assertEquals(numLogFiles, statuses.count()); assertEquals(numLogFiles, stats.stream().flatMap(Collection::stream).filter(state -> state.getPath().contains(extension)).count());
assertEquals(numLogFiles, stats.stream().mapToLong(Collection::size).sum());
//writeClient.commitCompaction(newCommitTime, statuses, Option.empty()); //writeClient.commitCompaction(newCommitTime, statuses, Option.empty());
// Trigger a rollback of compaction // Trigger a rollback of compaction
table.getActiveTimeline().reload(); table.getActiveTimeline().reload();
@@ -862,14 +867,15 @@ public class TestHoodieSparkMergeOnReadTableRollback extends SparkClientFunction
private long doCompaction(SparkRDDWriteClient client, HoodieTableMetaClient metaClient, HoodieWriteConfig cfg, long numLogFiles) throws IOException { private long doCompaction(SparkRDDWriteClient client, HoodieTableMetaClient metaClient, HoodieWriteConfig cfg, long numLogFiles) throws IOException {
// Do a compaction // Do a compaction
String instantTime = client.scheduleCompaction(Option.empty()).get().toString(); String instantTime = client.scheduleCompaction(Option.empty()).get().toString();
JavaRDD<WriteStatus> writeStatuses = (JavaRDD<WriteStatus>) client.compact(instantTime); HoodieWriteMetadata<JavaRDD<WriteStatus>> compactionMetadata = client.compact(instantTime);
metaClient.reloadActiveTimeline(); metaClient.reloadActiveTimeline();
HoodieTable table = HoodieSparkTable.create(cfg, context(), metaClient); HoodieTable table = HoodieSparkTable.create(cfg, context(), metaClient);
String extension = table.getBaseFileExtension(); String extension = table.getBaseFileExtension();
assertEquals(numLogFiles, writeStatuses.map(status -> status.getStat().getPath().contains(extension)).count()); Collection<List<HoodieWriteStat>> stats = compactionMetadata.getCommitMetadata().get().getPartitionToWriteStats().values();
assertEquals(numLogFiles, writeStatuses.count()); assertEquals(numLogFiles, stats.stream().flatMap(Collection::stream).filter(state -> state.getPath().contains(extension)).count());
client.commitCompaction(instantTime, writeStatuses, Option.empty()); assertEquals(numLogFiles, stats.stream().mapToLong(Collection::size).sum());
client.commitCompaction(instantTime, compactionMetadata.getCommitMetadata().get(), Option.empty());
return numLogFiles; return numLogFiles;
} }

View File

@@ -35,10 +35,12 @@ import java.io.IOException;
import java.io.Serializable; import java.io.Serializable;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.stream.Collectors;
/** /**
* All the metadata that gets stored along with a commit. * All the metadata that gets stored along with a commit.
@@ -89,6 +91,10 @@ public class HoodieCommitMetadata implements Serializable {
return partitionToWriteStats; return partitionToWriteStats;
} }
public List<HoodieWriteStat> getWriteStats() {
return partitionToWriteStats.values().stream().flatMap(Collection::stream).collect(Collectors.toList());
}
public String getMetadata(String metaKey) { public String getMetadata(String metaKey) {
return extraMetadata.get(metaKey); return extraMetadata.get(metaKey);
} }

View File

@@ -38,6 +38,8 @@ import org.apache.hudi.index.HoodieIndex;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hudi.table.action.HoodieWriteMetadata;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
@@ -140,8 +142,8 @@ public class HoodieWriteClientExample {
// compaction // compaction
if (HoodieTableType.valueOf(tableType) == HoodieTableType.MERGE_ON_READ) { if (HoodieTableType.valueOf(tableType) == HoodieTableType.MERGE_ON_READ) {
Option<String> instant = client.scheduleCompaction(Option.empty()); Option<String> instant = client.scheduleCompaction(Option.empty());
JavaRDD<WriteStatus> writeStatues = client.compact(instant.get()); HoodieWriteMetadata<JavaRDD<WriteStatus>> compactionMetadata = client.compact(instant.get());
client.commitCompaction(instant.get(), writeStatues, Option.empty()); client.commitCompaction(instant.get(), compactionMetadata.getCommitMetadata().get(), Option.empty());
} }
} }

View File

@@ -20,12 +20,15 @@ package org.apache.hudi.sink.compact;
import org.apache.hudi.avro.model.HoodieCompactionPlan; import org.apache.hudi.avro.model.HoodieCompactionPlan;
import org.apache.hudi.client.WriteStatus; import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.data.HoodieList;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.util.CompactionUtils; import org.apache.hudi.common.util.CompactionUtils;
import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.Option;
import org.apache.hudi.configuration.FlinkOptions; import org.apache.hudi.configuration.FlinkOptions;
import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.sink.CleanFunction; import org.apache.hudi.sink.CleanFunction;
import org.apache.hudi.table.HoodieFlinkTable; import org.apache.hudi.table.HoodieFlinkTable;
import org.apache.hudi.table.action.compact.CompactHelpers;
import org.apache.hudi.util.CompactionUtil; import org.apache.hudi.util.CompactionUtil;
import org.apache.hudi.util.StreamerUtil; import org.apache.hudi.util.StreamerUtil;
@@ -147,8 +150,11 @@ public class CompactionCommitSink extends CleanFunction<CompactionCommitEvent> {
.flatMap(Collection::stream) .flatMap(Collection::stream)
.collect(Collectors.toList()); .collect(Collectors.toList());
HoodieCommitMetadata metadata = CompactHelpers.getInstance().createCompactionMetadata(
table, instant, HoodieList.of(statuses), writeClient.getConfig().getSchema());
// commit the compaction // commit the compaction
this.writeClient.commitCompaction(instant, statuses, Option.empty()); this.writeClient.commitCompaction(instant, metadata, Option.empty());
// Whether to clean up the old log file when compaction // Whether to clean up the old log file when compaction
if (!conf.getBoolean(FlinkOptions.CLEAN_ASYNC_ENABLED)) { if (!conf.getBoolean(FlinkOptions.CLEAN_ASYNC_ENABLED)) {

View File

@@ -24,7 +24,9 @@ import org.apache.hudi.client.SparkRDDWriteClient;
import org.apache.hudi.client.WriteStatus; import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.client.common.HoodieSparkEngineContext; import org.apache.hudi.client.common.HoodieSparkEngineContext;
import org.apache.hudi.common.model.HoodieAvroRecord; import org.apache.hudi.common.model.HoodieAvroRecord;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType; import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.Option;
@@ -33,6 +35,7 @@ import org.apache.hudi.config.HoodieCompactionConfig;
import org.apache.hudi.config.HoodieIndexConfig; import org.apache.hudi.config.HoodieIndexConfig;
import org.apache.hudi.config.HoodiePayloadConfig; import org.apache.hudi.config.HoodiePayloadConfig;
import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.data.HoodieJavaRDD;
import org.apache.hudi.index.HoodieIndex; import org.apache.hudi.index.HoodieIndex;
import org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.HoodieTestSuiteConfig; import org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.HoodieTestSuiteConfig;
import org.apache.hudi.integ.testsuite.dag.nodes.CleanNode; import org.apache.hudi.integ.testsuite.dag.nodes.CleanNode;
@@ -40,6 +43,9 @@ import org.apache.hudi.integ.testsuite.dag.nodes.DagNode;
import org.apache.hudi.integ.testsuite.dag.nodes.RollbackNode; import org.apache.hudi.integ.testsuite.dag.nodes.RollbackNode;
import org.apache.hudi.integ.testsuite.dag.nodes.ScheduleCompactNode; import org.apache.hudi.integ.testsuite.dag.nodes.ScheduleCompactNode;
import org.apache.hudi.integ.testsuite.writer.DeltaWriteStats; import org.apache.hudi.integ.testsuite.writer.DeltaWriteStats;
import org.apache.hudi.table.HoodieSparkTable;
import org.apache.hudi.table.action.HoodieWriteMetadata;
import org.apache.hudi.table.action.compact.CompactHelpers;
import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.avro.Schema; import org.apache.avro.Schema;
@@ -215,7 +221,8 @@ public class HoodieTestSuiteWriter implements Serializable {
} }
} }
if (instantTime.isPresent()) { if (instantTime.isPresent()) {
return (JavaRDD<WriteStatus>) writeClient.compact(instantTime.get()); HoodieWriteMetadata<JavaRDD<WriteStatus>> compactionMetadata = writeClient.compact(instantTime.get());
return compactionMetadata.getWriteStatuses();
} else { } else {
return null; return null;
} }
@@ -272,7 +279,9 @@ public class HoodieTestSuiteWriter implements Serializable {
// Just stores the path where this batch of data is generated to // Just stores the path where this batch of data is generated to
extraMetadata.put(GENERATED_DATA_PATH, generatedDataStats.map(s -> s.getFilePath()).collect().get(0)); extraMetadata.put(GENERATED_DATA_PATH, generatedDataStats.map(s -> s.getFilePath()).collect().get(0));
} }
writeClient.commitCompaction(instantTime.get(), records, Option.of(extraMetadata)); HoodieSparkTable<HoodieRecordPayload> table = HoodieSparkTable.create(writeClient.getConfig(), writeClient.getEngineContext());
HoodieCommitMetadata metadata = CompactHelpers.getInstance().createCompactionMetadata(table, instantTime.get(), HoodieJavaRDD.of(records), writeClient.getConfig().getSchema());
writeClient.commitCompaction(instantTime.get(), metadata, Option.of(extraMetadata));
} }
} }

View File

@@ -17,14 +17,13 @@
package org.apache.spark.sql.hudi.command package org.apache.spark.sql.hudi.command
import org.apache.hudi.client.WriteStatus import org.apache.hudi.common.model.{HoodieCommitMetadata, HoodieTableType}
import org.apache.hudi.common.model.HoodieTableType
import org.apache.hudi.common.table.timeline.{HoodieActiveTimeline, HoodieTimeline} import org.apache.hudi.common.table.timeline.{HoodieActiveTimeline, HoodieTimeline}
import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver} import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
import org.apache.hudi.common.util.{HoodieTimer, Option => HOption} import org.apache.hudi.common.util.{HoodieTimer, Option => HOption}
import org.apache.hudi.exception.HoodieException import org.apache.hudi.exception.HoodieException
import org.apache.hudi.{DataSourceUtils, DataSourceWriteOptions, HoodieWriterUtils} import org.apache.hudi.{DataSourceUtils, DataSourceWriteOptions, HoodieWriterUtils}
import org.apache.spark.api.java.{JavaRDD, JavaSparkContext} import org.apache.spark.api.java.JavaSparkContext
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
import org.apache.spark.sql.catalyst.plans.logical.CompactionOperation import org.apache.spark.sql.catalyst.plans.logical.CompactionOperation
import org.apache.spark.sql.catalyst.plans.logical.CompactionOperation.{CompactionOperation, RUN, SCHEDULE} import org.apache.spark.sql.catalyst.plans.logical.CompactionOperation.{CompactionOperation, RUN, SCHEDULE}
@@ -100,8 +99,8 @@ case class CompactionHoodiePathCommand(path: String,
timer.startTimer() timer.startTimer()
willCompactionInstants.foreach {compactionInstant => willCompactionInstants.foreach {compactionInstant =>
val writeResponse = client.compact(compactionInstant) val writeResponse = client.compact(compactionInstant)
handlerResponse(writeResponse) handleResponse(writeResponse.getCommitMetadata.get())
client.commitCompaction(compactionInstant, writeResponse, HOption.empty()) client.commitCompaction(compactionInstant, writeResponse.getCommitMetadata.get(), HOption.empty())
} }
logInfo(s"Finish Run compaction at instants: [${willCompactionInstants.mkString(",")}]," + logInfo(s"Finish Run compaction at instants: [${willCompactionInstants.mkString(",")}]," +
s" spend: ${timer.endTimer()}ms") s" spend: ${timer.endTimer()}ms")
@@ -111,17 +110,13 @@ case class CompactionHoodiePathCommand(path: String,
} }
} }
private def handlerResponse(writeResponse: JavaRDD[WriteStatus]): Unit = { private def handleResponse(metadata: HoodieCommitMetadata): Unit = {
// Handle error // Handle error
val error = writeResponse.rdd.filter(f => f.hasErrors).take(1).headOption val writeStats = metadata.getPartitionToWriteStats.entrySet().flatMap(e => e.getValue).toList
if (error.isDefined) { val errorsCount = writeStats.map(state => state.getTotalWriteErrors).sum
if (error.get.hasGlobalError) { if (errorsCount > 0) {
throw error.get.getGlobalError throw new HoodieException(s" Found $errorsCount when writing record")
} else if (!error.get.getErrors.isEmpty) {
val key = error.get.getErrors.asScala.head._1
val exception = error.get.getErrors.asScala.head._2
throw new HoodieException(s"Error in write record: $key", exception)
}
} }
} }

View File

@@ -23,7 +23,6 @@ import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieCommitMetadata; import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieRecordPayload; import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.TableSchemaResolver; import org.apache.hudi.common.table.TableSchemaResolver;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
@@ -49,7 +48,6 @@ import java.io.Serializable;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
import java.util.stream.Collectors;
public class HoodieClusteringJob { public class HoodieClusteringJob {
@@ -216,7 +214,7 @@ public class HoodieClusteringJob {
} }
Option<HoodieCommitMetadata> commitMetadata = client.cluster(cfg.clusteringInstantTime, true).getCommitMetadata(); Option<HoodieCommitMetadata> commitMetadata = client.cluster(cfg.clusteringInstantTime, true).getCommitMetadata();
return handleErrors(commitMetadata.get(), cfg.clusteringInstantTime); return UtilHelpers.handleErrors(commitMetadata.get(), cfg.clusteringInstantTime);
} }
} }
@@ -271,20 +269,7 @@ public class HoodieClusteringJob {
LOG.info("The schedule instant time is " + instantTime.get()); LOG.info("The schedule instant time is " + instantTime.get());
LOG.info("Step 2: Do cluster"); LOG.info("Step 2: Do cluster");
Option<HoodieCommitMetadata> metadata = client.cluster(instantTime.get(), true).getCommitMetadata(); Option<HoodieCommitMetadata> metadata = client.cluster(instantTime.get(), true).getCommitMetadata();
return handleErrors(metadata.get(), instantTime.get()); return UtilHelpers.handleErrors(metadata.get(), instantTime.get());
} }
} }
private int handleErrors(HoodieCommitMetadata metadata, String instantTime) {
List<HoodieWriteStat> writeStats = metadata.getPartitionToWriteStats().entrySet().stream().flatMap(e ->
e.getValue().stream()).collect(Collectors.toList());
long errorsCount = writeStats.stream().mapToLong(HoodieWriteStat::getTotalWriteErrors).sum();
if (errorsCount == 0) {
LOG.info(String.format("Table imported into hoodie with %s instant time.", instantTime));
return 0;
}
LOG.error(String.format("Import failed with %d errors.", errorsCount));
return -1;
}
} }

View File

@@ -38,6 +38,7 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.table.action.HoodieWriteMetadata;
import org.apache.hudi.table.action.compact.strategy.LogFileSizeBasedCompactionStrategy; import org.apache.hudi.table.action.compact.strategy.LogFileSizeBasedCompactionStrategy;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
@@ -266,8 +267,8 @@ public class HoodieCompactor {
throw new HoodieCompactionException("There is no scheduled compaction in the table."); throw new HoodieCompactionException("There is no scheduled compaction in the table.");
} }
} }
JavaRDD<WriteStatus> writeResponse = client.compact(cfg.compactionInstantTime); HoodieWriteMetadata<JavaRDD<WriteStatus>> compactionMetadata = client.compact(cfg.compactionInstantTime);
return UtilHelpers.handleErrors(jsc, cfg.compactionInstantTime, writeResponse); return UtilHelpers.handleErrors(compactionMetadata.getCommitMetadata().get(), cfg.compactionInstantTime);
} }
} }

View File

@@ -26,7 +26,9 @@ import org.apache.hudi.client.common.HoodieSparkEngineContext;
import org.apache.hudi.common.config.DFSPropertiesConfiguration; import org.apache.hudi.common.config.DFSPropertiesConfiguration;
import org.apache.hudi.common.config.TypedProperties; import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieRecordPayload; import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.TableSchemaResolver; import org.apache.hudi.common.table.TableSchemaResolver;
import org.apache.hudi.common.util.Functions.Function1; import org.apache.hudi.common.util.Functions.Function1;
@@ -303,6 +305,18 @@ public class UtilHelpers {
return -1; return -1;
} }
public static int handleErrors(HoodieCommitMetadata metadata, String instantTime) {
List<HoodieWriteStat> writeStats = metadata.getWriteStats();
long errorsCount = writeStats.stream().mapToLong(HoodieWriteStat::getTotalWriteErrors).sum();
if (errorsCount == 0) {
LOG.info(String.format("Finish job with %s instant time.", instantTime));
return 0;
}
LOG.error(String.format("Job failed with %d errors.", errorsCount));
return -1;
}
/** /**
* Returns a factory for creating connections to the given JDBC URL. * Returns a factory for creating connections to the given JDBC URL.
* *