1
0

[HUDI-1705] Flush as per data bucket for mini-batch write (#2695)

Detects the buffer size for each data bucket before flushing. So that we
avoid flushing data buckets with few records.
This commit is contained in:
Danny Chan
2021-03-19 16:30:54 +08:00
committed by GitHub
parent 1277c62398
commit f74828fca1
4 changed files with 88 additions and 47 deletions

View File

@@ -399,6 +399,7 @@ public class TestWriteCopyOnWrite {
// this triggers the data write and event send
funcWrapper.checkpointFunction(1);
dataBuffer = funcWrapper.getDataBuffer();
assertThat("All data should be flushed out", dataBuffer.size(), is(0));
final OperatorEvent event1 = funcWrapper.getNextEvent(); // remove the first event first
@@ -430,10 +431,8 @@ public class TestWriteCopyOnWrite {
final OperatorEvent event3 = funcWrapper.getNextEvent(); // remove the first event first
final OperatorEvent event4 = funcWrapper.getNextEvent();
final OperatorEvent event5 = funcWrapper.getNextEvent();
funcWrapper.getCoordinator().handleEventFromOperator(0, event3);
funcWrapper.getCoordinator().handleEventFromOperator(0, event4);
funcWrapper.getCoordinator().handleEventFromOperator(0, event5);
funcWrapper.checkpointComplete(2);
// Same the original base file content.

View File

@@ -137,7 +137,7 @@ public class StreamWriteFunctionWrapper<I> {
}
public Map<String, List<HoodieRecord>> getDataBuffer() {
return this.writeFunction.getBuffer();
return this.writeFunction.getDataBuffer();
}
@SuppressWarnings("rawtypes")

View File

@@ -33,11 +33,11 @@ import org.apache.flink.table.sources.StreamTableSource;
import org.apache.flink.table.types.DataType;
import org.apache.flink.table.types.logical.RowType;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
@@ -151,15 +151,9 @@ public class ContinuousFileSource implements StreamTableSource<RowData> {
}
private void loadDataBuffer() {
this.dataBuffer = new ArrayList<>();
try (BufferedReader reader =
new BufferedReader(new FileReader(this.path.toString()))) {
String line = reader.readLine();
while (line != null) {
this.dataBuffer.add(line);
// read next line
line = reader.readLine();
}
try {
new File(this.path.toString()).exists();
this.dataBuffer = Files.readAllLines(Paths.get(this.path.toUri()));
} catch (IOException e) {
throw new RuntimeException("Read file " + this.path + " error", e);
}