[HUDI-1571] Adding commit_show_records_info to display record sizes for commit (#2514)
This commit is contained in:
committed by
GitHub
parent
b51b3a39a8
commit
b5d4a046bb
@@ -126,6 +126,9 @@ public class HoodieTableHeaderFields {
|
||||
public static final String HEADER_TOTAL_RECORDS_INSERTED = "Total Records Inserted";
|
||||
public static final String HEADER_TOTAL_RECORDS_UPDATED = "Total Records Updated";
|
||||
public static final String HEADER_TOTAL_ERRORS = "Total Errors";
|
||||
public static final String HEADER_TOTAL_RECORDS_WRITTEN_COMMIT = "Total Records Written for entire commit";
|
||||
public static final String HEADER_TOTAL_BYTES_WRITTEN_COMMIT = "Total Bytes Written for entire commit";
|
||||
public static final String HEADER_AVG_REC_SIZE_COMMIT = "Avg record size for entire commit";
|
||||
|
||||
/**
|
||||
* Fields of commit metadata.
|
||||
|
||||
@@ -314,6 +314,45 @@ public class CommitsCommand implements CommandMarker {
|
||||
limit, headerOnly, rows, exportTableName);
|
||||
}
|
||||
|
||||
@CliCommand(value = "commit show_write_stats", help = "Show write stats of a commit")
|
||||
public String showWriteStats(
|
||||
@CliOption(key = {"createView"}, mandatory = false, help = "view name to store output table",
|
||||
unspecifiedDefaultValue = "") final String exportTableName,
|
||||
@CliOption(key = {"commit"}, help = "Commit to show") final String instantTime,
|
||||
@CliOption(key = {"limit"}, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit,
|
||||
@CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
|
||||
@CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
|
||||
@CliOption(key = {"headeronly"}, help = "Print Header Only",
|
||||
unspecifiedDefaultValue = "false") final boolean headerOnly)
|
||||
throws Exception {
|
||||
|
||||
HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
|
||||
HoodieTimeline timeline = activeTimeline.getCommitsTimeline().filterCompletedInstants();
|
||||
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, instantTime);
|
||||
|
||||
if (!timeline.containsInstant(commitInstant)) {
|
||||
return "Commit " + instantTime + " not found in Commits " + timeline;
|
||||
}
|
||||
HoodieCommitMetadata meta = HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(commitInstant).get(),
|
||||
HoodieCommitMetadata.class);
|
||||
long recordsWritten = meta.fetchTotalRecordsWritten();
|
||||
long bytesWritten = meta.fetchTotalBytesWritten();
|
||||
long avgRecSize = (long) Math.ceil((1.0 * bytesWritten) / recordsWritten);
|
||||
List<Comparable[]> rows = new ArrayList<>();
|
||||
rows.add(new Comparable[] {bytesWritten, recordsWritten, avgRecSize});
|
||||
|
||||
Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
|
||||
fieldNameToConverterMap.put(HoodieTableHeaderFields.HEADER_TOTAL_BYTES_WRITTEN, entry ->
|
||||
NumericUtils.humanReadableByteCount((Long.parseLong(entry.toString()))));
|
||||
|
||||
TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_BYTES_WRITTEN_COMMIT)
|
||||
.addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_RECORDS_WRITTEN_COMMIT)
|
||||
.addTableHeaderField(HoodieTableHeaderFields.HEADER_AVG_REC_SIZE_COMMIT);
|
||||
|
||||
return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending,
|
||||
limit, headerOnly, rows, exportTableName);
|
||||
}
|
||||
|
||||
@CliCommand(value = "commit showfiles", help = "Show file level details of a commit")
|
||||
public String showCommitFiles(
|
||||
@CliOption(key = {"createView"}, mandatory = false, help = "view name to store output table",
|
||||
|
||||
Reference in New Issue
Block a user