[HUDI-1434] fix incorrect log file path in HoodieWriteStat (#2300)
* [HUDI-1434] fix incorrect log file path in HoodieWriteStat * HoodieWriteHandle#close() returns a list of WriteStatus objs * Handle rolled-over log files and return a WriteStatus per log file written - Combined data and delete block logging into a single call - Lazily initialize and manage write status based on returned AppendResult - Use FSUtils.getFileSize() to set final file size, consistent with other handles - Added tests around returned values in AppendResult - Added validation of the file sizes returned in write stat Co-authored-by: Vinoth Chandar <vinoth@apache.org>
This commit is contained in:
@@ -183,7 +183,7 @@ public class HoodieSparkCopyOnWriteTable<T extends HoodieRecordPayload> extends
|
||||
return handleUpdateInternal(upsertHandle, instantTime, fileId);
|
||||
}
|
||||
|
||||
protected Iterator<List<WriteStatus>> handleUpdateInternal(HoodieMergeHandle upsertHandle, String instantTime,
|
||||
protected Iterator<List<WriteStatus>> handleUpdateInternal(HoodieMergeHandle<?,?,?,?> upsertHandle, String instantTime,
|
||||
String fileId) throws IOException {
|
||||
if (upsertHandle.getOldFilePath() == null) {
|
||||
throw new HoodieUpsertException(
|
||||
@@ -193,11 +193,12 @@ public class HoodieSparkCopyOnWriteTable<T extends HoodieRecordPayload> extends
|
||||
}
|
||||
|
||||
// TODO(vc): This needs to be revisited
|
||||
if (upsertHandle.getWriteStatus().getPartitionPath() == null) {
|
||||
if (upsertHandle.getPartitionPath() == null) {
|
||||
LOG.info("Upsert Handle has partition path as null " + upsertHandle.getOldFilePath() + ", "
|
||||
+ upsertHandle.getWriteStatus());
|
||||
+ upsertHandle.writeStatuses());
|
||||
}
|
||||
return Collections.singletonList(Collections.singletonList(upsertHandle.getWriteStatus())).iterator();
|
||||
|
||||
return Collections.singletonList(upsertHandle.writeStatuses()).iterator();
|
||||
}
|
||||
|
||||
protected HoodieMergeHandle getUpdateHandle(String instantTime, String partitionPath, String fileId,
|
||||
@@ -213,10 +214,10 @@ public class HoodieSparkCopyOnWriteTable<T extends HoodieRecordPayload> extends
|
||||
|
||||
public Iterator<List<WriteStatus>> handleInsert(String instantTime, String partitionPath, String fileId,
|
||||
Map<String, HoodieRecord<? extends HoodieRecordPayload>> recordMap) {
|
||||
HoodieCreateHandle createHandle =
|
||||
HoodieCreateHandle<?,?,?,?> createHandle =
|
||||
new HoodieCreateHandle(config, instantTime, this, partitionPath, fileId, recordMap, taskContextSupplier);
|
||||
createHandle.write();
|
||||
return Collections.singletonList(Collections.singletonList(createHandle.close())).iterator();
|
||||
return Collections.singletonList(createHandle.close()).iterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -291,7 +291,7 @@ public class SparkBootstrapCommitActionExecutor<T extends HoodieRecordPayload<T>
|
||||
HoodieFileStatus srcFileStatus, KeyGeneratorInterface keyGenerator) {
|
||||
|
||||
Path sourceFilePath = FileStatusUtils.toPath(srcFileStatus.getPath());
|
||||
HoodieBootstrapHandle bootstrapHandle = new HoodieBootstrapHandle(config, HoodieTimeline.METADATA_BOOTSTRAP_INSTANT_TS,
|
||||
HoodieBootstrapHandle<?,?,?,?> bootstrapHandle = new HoodieBootstrapHandle(config, HoodieTimeline.METADATA_BOOTSTRAP_INSTANT_TS,
|
||||
table, partitionPath, FSUtils.createNewFileIdPfx(), table.getTaskContextSupplier());
|
||||
Schema avroSchema = null;
|
||||
try {
|
||||
@@ -329,7 +329,8 @@ public class SparkBootstrapCommitActionExecutor<T extends HoodieRecordPayload<T>
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException(e.getMessage(), e);
|
||||
}
|
||||
BootstrapWriteStatus writeStatus = (BootstrapWriteStatus)bootstrapHandle.getWriteStatus();
|
||||
|
||||
BootstrapWriteStatus writeStatus = (BootstrapWriteStatus) bootstrapHandle.writeStatuses().get(0);
|
||||
BootstrapFileMapping bootstrapFileMapping = new BootstrapFileMapping(
|
||||
config.getBootstrapSourceBasePath(), srcPartitionPath, partitionPath,
|
||||
srcFileStatus, writeStatus.getFileId());
|
||||
|
||||
@@ -299,7 +299,7 @@ public abstract class BaseSparkCommitActionExecutor<T extends HoodieRecordPayloa
|
||||
return handleUpdateInternal(upsertHandle, fileId);
|
||||
}
|
||||
|
||||
protected Iterator<List<WriteStatus>> handleUpdateInternal(HoodieMergeHandle upsertHandle, String fileId)
|
||||
protected Iterator<List<WriteStatus>> handleUpdateInternal(HoodieMergeHandle<?,?,?,?> upsertHandle, String fileId)
|
||||
throws IOException {
|
||||
if (upsertHandle.getOldFilePath() == null) {
|
||||
throw new HoodieUpsertException(
|
||||
@@ -309,11 +309,12 @@ public abstract class BaseSparkCommitActionExecutor<T extends HoodieRecordPayloa
|
||||
}
|
||||
|
||||
// TODO(vc): This needs to be revisited
|
||||
if (upsertHandle.getWriteStatus().getPartitionPath() == null) {
|
||||
if (upsertHandle.getPartitionPath() == null) {
|
||||
LOG.info("Upsert Handle has partition path as null " + upsertHandle.getOldFilePath() + ", "
|
||||
+ upsertHandle.getWriteStatus());
|
||||
+ upsertHandle.writeStatuses());
|
||||
}
|
||||
return Collections.singletonList(Collections.singletonList(upsertHandle.getWriteStatus())).iterator();
|
||||
|
||||
return Collections.singletonList(upsertHandle.writeStatuses()).iterator();
|
||||
}
|
||||
|
||||
protected HoodieMergeHandle getUpdateHandle(String partitionPath, String fileId, Iterator<HoodieRecord<T>> recordItr) {
|
||||
|
||||
@@ -79,11 +79,10 @@ public abstract class AbstractSparkDeltaCommitActionExecutor<T extends HoodieRec
|
||||
LOG.info("Small file corrections for updates for commit " + instantTime + " for file " + fileId);
|
||||
return super.handleUpdate(partitionPath, fileId, recordItr);
|
||||
} else {
|
||||
HoodieAppendHandle appendHandle = new HoodieAppendHandle<>(config, instantTime, table,
|
||||
HoodieAppendHandle<?,?,?,?> appendHandle = new HoodieAppendHandle<>(config, instantTime, table,
|
||||
partitionPath, fileId, recordItr, taskContextSupplier);
|
||||
appendHandle.doAppend();
|
||||
appendHandle.close();
|
||||
return Collections.singletonList(Collections.singletonList(appendHandle.getWriteStatus())).iterator();
|
||||
return Collections.singletonList(appendHandle.close()).iterator();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -128,7 +128,7 @@ public class ListingBasedRollbackHelper implements Serializable {
|
||||
if (doDelete) {
|
||||
Map<HeaderMetadataType, String> header = generateHeader(instantToRollback.getTimestamp());
|
||||
// if update belongs to an existing log file
|
||||
writer = writer.appendBlock(new HoodieCommandBlock(header));
|
||||
writer.appendBlock(new HoodieCommandBlock(header));
|
||||
}
|
||||
} catch (IOException | InterruptedException io) {
|
||||
throw new HoodieRollbackException("Failed to rollback for instant " + instantToRollback, io);
|
||||
|
||||
Reference in New Issue
Block a user