1
0

Avoid WriteStatus collect() call when committing batch

This commit is contained in:
Balaji Varadarajan
2018-11-27 23:21:34 -08:00
committed by vinoth chandar
parent fa65db9c4c
commit f999e4960c
8 changed files with 29 additions and 22 deletions

View File

@@ -29,6 +29,7 @@ import com.uber.hoodie.common.model.HoodieRecord;
import com.uber.hoodie.common.model.HoodieRecordLocation;
import com.uber.hoodie.common.model.HoodieRecordPayload;
import com.uber.hoodie.common.model.HoodieRollingStatMetadata;
import com.uber.hoodie.common.model.HoodieWriteStat;
import com.uber.hoodie.common.table.HoodieTableMetaClient;
import com.uber.hoodie.common.table.HoodieTimeline;
import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
@@ -376,20 +377,19 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
/**
* Finalize the written data files
*
* @param writeStatuses List of WriteStatus
* @param stats List of HoodieWriteStats
* @return number of files finalized
*/
@Override
@SuppressWarnings("unchecked")
public void finalizeWrite(JavaSparkContext jsc, List<WriteStatus> writeStatuses)
public void finalizeWrite(JavaSparkContext jsc, List<HoodieWriteStat> stats)
throws HoodieIOException {
super.finalizeWrite(jsc, writeStatuses);
super.finalizeWrite(jsc, stats);
if (config.shouldUseTempFolderForCopyOnWrite()) {
// This is to rename each data file from temporary path to its final location
jsc.parallelize(writeStatuses, config.getFinalizeWriteParallelism())
.map(status -> status.getStat())
jsc.parallelize(stats, config.getFinalizeWriteParallelism())
.foreach(writeStat -> {
final FileSystem fs = getMetaClient().getFs();
final Path finalPath = new Path(config.getBasePath(), writeStat.getPath());