1
0

[HUDI-4353] Column stats data skipping for flink (#6026)

This commit is contained in:
Danny Chan
2022-07-03 08:29:31 +08:00
committed by GitHub
parent bdf73b2650
commit 47792a3186
30 changed files with 1930 additions and 81 deletions

View File

@@ -74,6 +74,7 @@ import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
@@ -159,10 +160,14 @@ public class HoodieFlinkWriteClient<T extends HoodieRecordPayload> extends
initTable(WriteOperationType.UPSERT, Option.ofNullable(instantTime));
table.validateUpsertSchema();
preWrite(instantTime, WriteOperationType.UPSERT_PREPPED, table.getMetaClient());
final HoodieWriteHandle<?, ?, ?, ?> writeHandle = getOrCreateWriteHandle(preppedRecords.get(0), getConfig(),
instantTime, table, preppedRecords.listIterator());
HoodieWriteMetadata<List<WriteStatus>> result = ((HoodieFlinkTable<T>) table).upsertPrepped(context, writeHandle, instantTime, preppedRecords);
return postWrite(result, instantTime, table);
Map<String, List<HoodieRecord<T>>> preppedRecordsByFileId = preppedRecords.stream().parallel()
.collect(Collectors.groupingBy(r -> r.getCurrentLocation().getFileId()));
return preppedRecordsByFileId.values().stream().parallel().map(records -> {
final HoodieWriteHandle<?, ?, ?, ?> writeHandle = getOrCreateWriteHandle(records.get(0), getConfig(),
instantTime, table, records.listIterator());
HoodieWriteMetadata<List<WriteStatus>> result = ((HoodieFlinkTable<T>) table).upsertPrepped(context, writeHandle, instantTime, records);
return postWrite(result, instantTime, table);
}).flatMap(Collection::stream).collect(Collectors.toList());
}
@Override