1
0

[HUDI-1571] Adding commit_show_records_info to display record sizes for commit (#2514)

This commit is contained in:
Sivabalan Narayanan
2021-02-05 07:53:24 -05:00
committed by GitHub
parent b51b3a39a8
commit b5d4a046bb
2 changed files with 42 additions and 0 deletions

View File

@@ -126,6 +126,9 @@ public class HoodieTableHeaderFields {
public static final String HEADER_TOTAL_RECORDS_INSERTED = "Total Records Inserted";
public static final String HEADER_TOTAL_RECORDS_UPDATED = "Total Records Updated";
public static final String HEADER_TOTAL_ERRORS = "Total Errors";
public static final String HEADER_TOTAL_RECORDS_WRITTEN_COMMIT = "Total Records Written for entire commit";
public static final String HEADER_TOTAL_BYTES_WRITTEN_COMMIT = "Total Bytes Written for entire commit";
public static final String HEADER_AVG_REC_SIZE_COMMIT = "Avg record size for entire commit";
/**
* Fields of commit metadata.

View File

@@ -314,6 +314,45 @@ public class CommitsCommand implements CommandMarker {
limit, headerOnly, rows, exportTableName);
}
@CliCommand(value = "commit show_write_stats", help = "Show write stats of a commit")
public String showWriteStats(
@CliOption(key = {"createView"}, mandatory = false, help = "view name to store output table",
unspecifiedDefaultValue = "") final String exportTableName,
@CliOption(key = {"commit"}, help = "Commit to show") final String instantTime,
@CliOption(key = {"limit"}, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit,
@CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
@CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
@CliOption(key = {"headeronly"}, help = "Print Header Only",
unspecifiedDefaultValue = "false") final boolean headerOnly)
throws Exception {
HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
HoodieTimeline timeline = activeTimeline.getCommitsTimeline().filterCompletedInstants();
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, instantTime);
if (!timeline.containsInstant(commitInstant)) {
return "Commit " + instantTime + " not found in Commits " + timeline;
}
HoodieCommitMetadata meta = HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(commitInstant).get(),
HoodieCommitMetadata.class);
long recordsWritten = meta.fetchTotalRecordsWritten();
long bytesWritten = meta.fetchTotalBytesWritten();
long avgRecSize = (long) Math.ceil((1.0 * bytesWritten) / recordsWritten);
List<Comparable[]> rows = new ArrayList<>();
rows.add(new Comparable[] {bytesWritten, recordsWritten, avgRecSize});
Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
fieldNameToConverterMap.put(HoodieTableHeaderFields.HEADER_TOTAL_BYTES_WRITTEN, entry ->
NumericUtils.humanReadableByteCount((Long.parseLong(entry.toString()))));
TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_BYTES_WRITTEN_COMMIT)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_RECORDS_WRITTEN_COMMIT)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_AVG_REC_SIZE_COMMIT);
return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending,
limit, headerOnly, rows, exportTableName);
}
@CliCommand(value = "commit showfiles", help = "Show file level details of a commit")
public String showCommitFiles(
@CliOption(key = {"createView"}, mandatory = false, help = "view name to store output table",