[minor] Checks the data block type for archived timeline (#5106)
This commit is contained in:
@@ -27,6 +27,7 @@ import org.apache.hudi.common.model.HoodiePartitionMetadata;
|
|||||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||||
import org.apache.hudi.common.table.log.HoodieLogFormat;
|
import org.apache.hudi.common.table.log.HoodieLogFormat;
|
||||||
import org.apache.hudi.common.table.log.block.HoodieAvroDataBlock;
|
import org.apache.hudi.common.table.log.block.HoodieAvroDataBlock;
|
||||||
|
import org.apache.hudi.common.table.log.block.HoodieLogBlock;
|
||||||
import org.apache.hudi.common.util.ClosableIterator;
|
import org.apache.hudi.common.util.ClosableIterator;
|
||||||
import org.apache.hudi.common.util.CollectionUtils;
|
import org.apache.hudi.common.util.CollectionUtils;
|
||||||
import org.apache.hudi.common.util.FileIOUtils;
|
import org.apache.hudi.common.util.FileIOUtils;
|
||||||
@@ -248,16 +249,19 @@ public class HoodieArchivedTimeline extends HoodieDefaultTimeline {
|
|||||||
int instantsInPreviousFile = instantsInRange.size();
|
int instantsInPreviousFile = instantsInRange.size();
|
||||||
// Read the avro blocks
|
// Read the avro blocks
|
||||||
while (reader.hasNext()) {
|
while (reader.hasNext()) {
|
||||||
HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
|
HoodieLogBlock block = reader.next();
|
||||||
// TODO If we can store additional metadata in datablock, we can skip parsing records
|
if (block instanceof HoodieAvroDataBlock) {
|
||||||
// (such as startTime, endTime of records in the block)
|
HoodieAvroDataBlock avroBlock = (HoodieAvroDataBlock) block;
|
||||||
try (ClosableIterator<IndexedRecord> itr = blk.getRecordItr()) {
|
// TODO If we can store additional metadata in datablock, we can skip parsing records
|
||||||
StreamSupport.stream(Spliterators.spliteratorUnknownSize(itr, Spliterator.IMMUTABLE), true)
|
// (such as startTime, endTime of records in the block)
|
||||||
// Filter blocks in desired time window
|
try (ClosableIterator<IndexedRecord> itr = avroBlock.getRecordItr()) {
|
||||||
.filter(r -> commitsFilter.apply((GenericRecord) r))
|
StreamSupport.stream(Spliterators.spliteratorUnknownSize(itr, Spliterator.IMMUTABLE), true)
|
||||||
.map(r -> readCommit((GenericRecord) r, loadInstantDetails))
|
// Filter blocks in desired time window
|
||||||
.filter(c -> filter == null || filter.isInRange(c))
|
.filter(r -> commitsFilter.apply((GenericRecord) r))
|
||||||
.forEach(instantsInRange::add);
|
.map(r -> readCommit((GenericRecord) r, loadInstantDetails))
|
||||||
|
.filter(c -> filter == null || filter.isInRange(c))
|
||||||
|
.forEach(instantsInRange::add);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -30,7 +30,6 @@ import org.apache.hudi.common.util.ValidationUtils;
|
|||||||
import org.apache.hudi.configuration.FlinkOptions;
|
import org.apache.hudi.configuration.FlinkOptions;
|
||||||
import org.apache.hudi.configuration.OptionsResolver;
|
import org.apache.hudi.configuration.OptionsResolver;
|
||||||
import org.apache.hudi.exception.HoodieException;
|
import org.apache.hudi.exception.HoodieException;
|
||||||
import org.apache.hudi.sink.event.CommitAckEvent;
|
|
||||||
import org.apache.hudi.sink.event.WriteMetadataEvent;
|
import org.apache.hudi.sink.event.WriteMetadataEvent;
|
||||||
import org.apache.hudi.sink.meta.CkpMetadata;
|
import org.apache.hudi.sink.meta.CkpMetadata;
|
||||||
import org.apache.hudi.sink.utils.HiveSyncContext;
|
import org.apache.hudi.sink.utils.HiveSyncContext;
|
||||||
@@ -43,7 +42,6 @@ import org.apache.flink.configuration.Configuration;
|
|||||||
import org.apache.flink.runtime.jobgraph.OperatorID;
|
import org.apache.flink.runtime.jobgraph.OperatorID;
|
||||||
import org.apache.flink.runtime.operators.coordination.OperatorCoordinator;
|
import org.apache.flink.runtime.operators.coordination.OperatorCoordinator;
|
||||||
import org.apache.flink.runtime.operators.coordination.OperatorEvent;
|
import org.apache.flink.runtime.operators.coordination.OperatorEvent;
|
||||||
import org.apache.flink.runtime.operators.coordination.TaskNotRunningException;
|
|
||||||
import org.jetbrains.annotations.Nullable;
|
import org.jetbrains.annotations.Nullable;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
@@ -431,31 +429,6 @@ public class StreamWriteOperatorCoordinator
|
|||||||
addEventToBuffer(event);
|
addEventToBuffer(event);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* The coordinator reuses the instant if there is no data for this round of checkpoint,
|
|
||||||
* sends the commit ack events to unblock the flushing.
|
|
||||||
*/
|
|
||||||
private void sendCommitAckEvents(long checkpointId) {
|
|
||||||
CompletableFuture<?>[] futures = Arrays.stream(this.gateways).filter(Objects::nonNull)
|
|
||||||
.map(gw -> gw.sendEvent(CommitAckEvent.getInstance(checkpointId)))
|
|
||||||
.toArray(CompletableFuture<?>[]::new);
|
|
||||||
CompletableFuture.allOf(futures).whenComplete((resp, error) -> {
|
|
||||||
if (!sendToFinishedTasks(error)) {
|
|
||||||
throw new HoodieException("Error while waiting for the commit ack events to finish sending", error);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Decides whether the given exception is caused by sending events to FINISHED tasks.
|
|
||||||
*
|
|
||||||
* <p>Ugly impl: the exception may change in the future.
|
|
||||||
*/
|
|
||||||
private static boolean sendToFinishedTasks(Throwable throwable) {
|
|
||||||
return throwable.getCause() instanceof TaskNotRunningException
|
|
||||||
|| throwable.getCause().getMessage().contains("running");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Commits the instant.
|
* Commits the instant.
|
||||||
*/
|
*/
|
||||||
@@ -483,8 +456,6 @@ public class StreamWriteOperatorCoordinator
|
|||||||
if (writeResults.size() == 0) {
|
if (writeResults.size() == 0) {
|
||||||
// No data has written, reset the buffer and returns early
|
// No data has written, reset the buffer and returns early
|
||||||
reset();
|
reset();
|
||||||
// Send commit ack event to the write function to unblock the flushing
|
|
||||||
sendCommitAckEvents(checkpointId);
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
doCommit(instant, writeResults);
|
doCommit(instant, writeResults);
|
||||||
|
|||||||
Reference in New Issue
Block a user