[HUDI-1705] Flush as per data bucket for mini-batch write (#2695)
Detects the buffer size for each data bucket before flushing. So that we avoid flushing data buckets with few records.
This commit is contained in:
@@ -399,6 +399,7 @@ public class TestWriteCopyOnWrite {
|
||||
|
||||
// this triggers the data write and event send
|
||||
funcWrapper.checkpointFunction(1);
|
||||
dataBuffer = funcWrapper.getDataBuffer();
|
||||
assertThat("All data should be flushed out", dataBuffer.size(), is(0));
|
||||
|
||||
final OperatorEvent event1 = funcWrapper.getNextEvent(); // remove the first event first
|
||||
@@ -430,10 +431,8 @@ public class TestWriteCopyOnWrite {
|
||||
|
||||
final OperatorEvent event3 = funcWrapper.getNextEvent(); // remove the first event first
|
||||
final OperatorEvent event4 = funcWrapper.getNextEvent();
|
||||
final OperatorEvent event5 = funcWrapper.getNextEvent();
|
||||
funcWrapper.getCoordinator().handleEventFromOperator(0, event3);
|
||||
funcWrapper.getCoordinator().handleEventFromOperator(0, event4);
|
||||
funcWrapper.getCoordinator().handleEventFromOperator(0, event5);
|
||||
funcWrapper.checkpointComplete(2);
|
||||
|
||||
// Same the original base file content.
|
||||
|
||||
@@ -137,7 +137,7 @@ public class StreamWriteFunctionWrapper<I> {
|
||||
}
|
||||
|
||||
public Map<String, List<HoodieRecord>> getDataBuffer() {
|
||||
return this.writeFunction.getBuffer();
|
||||
return this.writeFunction.getDataBuffer();
|
||||
}
|
||||
|
||||
@SuppressWarnings("rawtypes")
|
||||
|
||||
@@ -33,11 +33,11 @@ import org.apache.flink.table.sources.StreamTableSource;
|
||||
import org.apache.flink.table.types.DataType;
|
||||
import org.apache.flink.table.types.logical.RowType;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.FileReader;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
@@ -151,15 +151,9 @@ public class ContinuousFileSource implements StreamTableSource<RowData> {
|
||||
}
|
||||
|
||||
private void loadDataBuffer() {
|
||||
this.dataBuffer = new ArrayList<>();
|
||||
try (BufferedReader reader =
|
||||
new BufferedReader(new FileReader(this.path.toString()))) {
|
||||
String line = reader.readLine();
|
||||
while (line != null) {
|
||||
this.dataBuffer.add(line);
|
||||
// read next line
|
||||
line = reader.readLine();
|
||||
}
|
||||
try {
|
||||
new File(this.path.toString()).exists();
|
||||
this.dataBuffer = Files.readAllLines(Paths.get(this.path.toUri()));
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Read file " + this.path + " error", e);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user