[HUDI-4353] Column stats data skipping for flink (#6026)
This commit is contained in:
@@ -74,6 +74,7 @@ import org.slf4j.LoggerFactory;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.text.ParseException;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
@@ -159,10 +160,14 @@ public class HoodieFlinkWriteClient<T extends HoodieRecordPayload> extends
|
||||
initTable(WriteOperationType.UPSERT, Option.ofNullable(instantTime));
|
||||
table.validateUpsertSchema();
|
||||
preWrite(instantTime, WriteOperationType.UPSERT_PREPPED, table.getMetaClient());
|
||||
final HoodieWriteHandle<?, ?, ?, ?> writeHandle = getOrCreateWriteHandle(preppedRecords.get(0), getConfig(),
|
||||
instantTime, table, preppedRecords.listIterator());
|
||||
HoodieWriteMetadata<List<WriteStatus>> result = ((HoodieFlinkTable<T>) table).upsertPrepped(context, writeHandle, instantTime, preppedRecords);
|
||||
return postWrite(result, instantTime, table);
|
||||
Map<String, List<HoodieRecord<T>>> preppedRecordsByFileId = preppedRecords.stream().parallel()
|
||||
.collect(Collectors.groupingBy(r -> r.getCurrentLocation().getFileId()));
|
||||
return preppedRecordsByFileId.values().stream().parallel().map(records -> {
|
||||
final HoodieWriteHandle<?, ?, ?, ?> writeHandle = getOrCreateWriteHandle(records.get(0), getConfig(),
|
||||
instantTime, table, records.listIterator());
|
||||
HoodieWriteMetadata<List<WriteStatus>> result = ((HoodieFlinkTable<T>) table).upsertPrepped(context, writeHandle, instantTime, records);
|
||||
return postWrite(result, instantTime, table);
|
||||
}).flatMap(Collection::stream).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
Reference in New Issue
Block a user