[HUDI-2849] Improve SparkUI job description for write path (#4222)
This commit is contained in:
@@ -265,6 +265,7 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
|
||||
* @param metadata instance of {@link HoodieCommitMetadata}.
|
||||
*/
|
||||
protected void writeTableMetadata(HoodieTable table, String instantTime, String actionType, HoodieCommitMetadata metadata) {
|
||||
context.setJobStatus(this.getClass().getSimpleName(), "Committing to metadata table");
|
||||
table.getMetadataWriter(instantTime).ifPresent(w -> ((HoodieTableMetadataWriter) w).update(metadata, instantTime,
|
||||
table.isTableServiceAction(actionType)));
|
||||
}
|
||||
|
||||
@@ -464,6 +464,7 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
|
||||
|
||||
// List all partitions in the basePath of the containing dataset
|
||||
LOG.info("Initializing metadata table by using file listings in " + dataWriteConfig.getBasePath());
|
||||
engineContext.setJobStatus(this.getClass().getSimpleName(), "Bootstrap: initializing metadata table by listing files and partitions");
|
||||
List<DirectoryInfo> dirInfoList = listAllPartitions(dataMetaClient);
|
||||
|
||||
// During bootstrap, the list of files to be committed can be huge. So creating a HoodieCommitMetadata out of these
|
||||
|
||||
@@ -72,6 +72,7 @@ public class CleanPlanActionExecutor<T extends HoodieRecordPayload, I, K, O> ext
|
||||
try {
|
||||
CleanPlanner<T, I, K, O> planner = new CleanPlanner<>(context, table, config);
|
||||
Option<HoodieInstant> earliestInstant = planner.getEarliestCommitToRetain();
|
||||
context.setJobStatus(this.getClass().getSimpleName(), "Obtaining list of partitions to be cleaned");
|
||||
List<String> partitionsToClean = planner.getPartitionPathsToClean(earliestInstant);
|
||||
|
||||
if (partitionsToClean.isEmpty()) {
|
||||
@@ -82,7 +83,7 @@ public class CleanPlanActionExecutor<T extends HoodieRecordPayload, I, K, O> ext
|
||||
int cleanerParallelism = Math.min(partitionsToClean.size(), config.getCleanerParallelism());
|
||||
LOG.info("Using cleanerParallelism: " + cleanerParallelism);
|
||||
|
||||
context.setJobStatus(this.getClass().getSimpleName(), "Generates list of file slices to be cleaned");
|
||||
context.setJobStatus(this.getClass().getSimpleName(), "Generating list of file slices to be cleaned");
|
||||
|
||||
Map<String, List<HoodieCleanFileInfo>> cleanOps = context
|
||||
.map(partitionsToClean, partitionPathToClean -> Pair.of(partitionPathToClean, planner.getDeletePaths(partitionPathToClean)), cleanerParallelism)
|
||||
|
||||
@@ -48,6 +48,7 @@ public abstract class AbstractWriteHelper<T extends HoodieRecordPayload, I, K, O
|
||||
I taggedRecords = dedupedRecords;
|
||||
if (performTagging) {
|
||||
// perform index loop up to get existing location of records
|
||||
context.setJobStatus(this.getClass().getSimpleName(), "Tagging");
|
||||
taggedRecords = tag(dedupedRecords, context, table);
|
||||
}
|
||||
Duration indexLookupDuration = Duration.between(lookupBegin, Instant.now());
|
||||
|
||||
@@ -118,6 +118,7 @@ public class ScheduleCompactionActionExecutor<T extends HoodieRecordPayload, I,
|
||||
.collect(Collectors.toSet());
|
||||
// exclude files in pending clustering from compaction.
|
||||
fgInPendingCompactionAndClustering.addAll(fileSystemView.getFileGroupsInPendingClustering().map(Pair::getLeft).collect(Collectors.toSet()));
|
||||
context.setJobStatus(this.getClass().getSimpleName(), "Compaction: generating compaction plan");
|
||||
return compactor.generateCompactionPlan(context, table, config, instantTime, fgInPendingCompactionAndClustering);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieCompactionException("Could not schedule compaction " + config.getBasePath(), e);
|
||||
|
||||
@@ -85,6 +85,7 @@ public abstract class WriteMarkers implements Serializable {
|
||||
*/
|
||||
public void quietDeleteMarkerDir(HoodieEngineContext context, int parallelism) {
|
||||
try {
|
||||
context.setJobStatus(this.getClass().getSimpleName(), "Deleting marker directory");
|
||||
deleteMarkerDir(context, parallelism);
|
||||
} catch (HoodieIOException ioe) {
|
||||
LOG.warn("Error deleting marker directory for instant " + instantTime, ioe);
|
||||
|
||||
@@ -120,6 +120,7 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
|
||||
@Override
|
||||
public boolean commit(String instantTime, JavaRDD<WriteStatus> writeStatuses, Option<Map<String, String>> extraMetadata,
|
||||
String commitActionType, Map<String, List<String>> partitionToReplacedFileIds) {
|
||||
context.setJobStatus(this.getClass().getSimpleName(), "Committing stats");
|
||||
List<HoodieWriteStat> writeStats = writeStatuses.map(WriteStatus::getStat).collect();
|
||||
return commitStats(instantTime, writeStats, extraMetadata, commitActionType, partitionToReplacedFileIds);
|
||||
}
|
||||
|
||||
@@ -115,6 +115,7 @@ public abstract class BaseSparkCommitActionExecutor<T extends HoodieRecordPayloa
|
||||
}
|
||||
|
||||
private JavaRDD<HoodieRecord<T>> clusteringHandleUpdate(JavaRDD<HoodieRecord<T>> inputRecordsRDD) {
|
||||
context.setJobStatus(this.getClass().getSimpleName(), "Handling updates which are under clustering");
|
||||
Set<HoodieFileGroupId> fileGroupsInPendingClustering =
|
||||
table.getFileSystemView().getFileGroupsInPendingClustering().map(entry -> entry.getKey()).collect(Collectors.toSet());
|
||||
UpdateStrategy updateStrategy = (UpdateStrategy) ReflectionUtils
|
||||
@@ -166,6 +167,7 @@ public abstract class BaseSparkCommitActionExecutor<T extends HoodieRecordPayloa
|
||||
|
||||
// partition using the insert partitioner
|
||||
final Partitioner partitioner = getPartitioner(profile);
|
||||
context.setJobStatus(this.getClass().getSimpleName(), "Doing partition and writing data");
|
||||
JavaRDD<HoodieRecord<T>> partitionedRecords = partition(inputRecordsRDDWithClusteringUpdate, partitioner);
|
||||
JavaRDD<WriteStatus> writeStatusRDD = partitionedRecords.mapPartitionsWithIndex((partition, recordItr) -> {
|
||||
if (WriteOperationType.isChangingRecords(operationType)) {
|
||||
@@ -276,7 +278,7 @@ public abstract class BaseSparkCommitActionExecutor<T extends HoodieRecordPayloa
|
||||
|
||||
protected void commit(Option<Map<String, String>> extraMetadata, HoodieWriteMetadata<JavaRDD<WriteStatus>> result, List<HoodieWriteStat> writeStats) {
|
||||
String actionType = getCommitActionType();
|
||||
LOG.info("Committing " + instantTime + ", action Type " + actionType);
|
||||
LOG.info("Committing " + instantTime + ", action Type " + actionType + ", operation Type " + operationType);
|
||||
result.setCommitted(true);
|
||||
result.setWriteStats(writeStats);
|
||||
// Finalize write
|
||||
|
||||
@@ -441,6 +441,7 @@ public class DeltaSync implements Serializable {
|
||||
return null;
|
||||
}
|
||||
|
||||
jssc.setJobGroup(this.getClass().getSimpleName(), "Checking if input is empty");
|
||||
if ((!avroRDDOptional.isPresent()) || (avroRDDOptional.get().isEmpty())) {
|
||||
LOG.info("No new data, perform empty commit.");
|
||||
return Pair.of(schemaProvider, Pair.of(checkpointStr, jssc.emptyRDD()));
|
||||
|
||||
Reference in New Issue
Block a user