[HUDI-1771] Propagate CDC format for hoodie (#3285)
This commit is contained in:
@@ -38,9 +38,9 @@ import org.apache.hudi.sink.compact.CompactionPlanSourceFunction;
|
||||
import org.apache.hudi.sink.compact.FlinkCompactionConfig;
|
||||
import org.apache.hudi.sink.partitioner.BucketAssignFunction;
|
||||
import org.apache.hudi.sink.partitioner.BucketAssignOperator;
|
||||
import org.apache.hudi.sink.transform.ChainedTransformer;
|
||||
import org.apache.hudi.sink.transform.RowDataToHoodieFunction;
|
||||
import org.apache.hudi.sink.transform.Transformer;
|
||||
import org.apache.hudi.sink.transform.ChainedTransformer;
|
||||
import org.apache.hudi.table.HoodieFlinkTable;
|
||||
import org.apache.hudi.util.AvroSchemaConverter;
|
||||
import org.apache.hudi.util.CompactionUtil;
|
||||
@@ -85,6 +85,8 @@ import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
/**
|
||||
* Integration test for Flink Hoodie stream sink.
|
||||
*/
|
||||
@@ -200,7 +202,9 @@ public class StreamWriteITCase extends TestLogger {
|
||||
// To compute the compaction instant time and do compaction.
|
||||
String compactionInstantTime = CompactionUtil.getCompactionInstantTime(metaClient);
|
||||
HoodieFlinkWriteClient writeClient = StreamerUtil.createWriteClient(conf, null);
|
||||
writeClient.scheduleCompactionAtInstant(compactionInstantTime, Option.empty());
|
||||
boolean scheduled = writeClient.scheduleCompactionAtInstant(compactionInstantTime, Option.empty());
|
||||
|
||||
assertTrue(scheduled, "The compaction plan should be scheduled");
|
||||
|
||||
HoodieFlinkTable<?> table = writeClient.getHoodieTable();
|
||||
// generate compaction plan
|
||||
@@ -209,8 +213,10 @@ public class StreamWriteITCase extends TestLogger {
|
||||
table.getMetaClient(), compactionInstantTime);
|
||||
|
||||
HoodieInstant instant = HoodieTimeline.getCompactionRequestedInstant(compactionInstantTime);
|
||||
// Mark instant as compaction inflight
|
||||
table.getActiveTimeline().transitionCompactionRequestedToInflight(instant);
|
||||
|
||||
env.addSource(new CompactionPlanSourceFunction(table, instant, compactionPlan, compactionInstantTime))
|
||||
env.addSource(new CompactionPlanSourceFunction(compactionPlan, compactionInstantTime))
|
||||
.name("compaction_source")
|
||||
.uid("uid_compaction_source")
|
||||
.rebalance()
|
||||
|
||||
@@ -399,33 +399,29 @@ public class TestWriteCopyOnWrite {
|
||||
// the coordinator checkpoint commits the inflight instant.
|
||||
checkInstantState(funcWrapper.getWriteClient(), HoodieInstant.State.COMPLETED, instant);
|
||||
|
||||
Map<String, String> expected = new HashMap<>();
|
||||
// id3, id5 were deleted and id9 is ignored
|
||||
expected.put("par1", "[id1,par1,id1,Danny,24,1,par1, id2,par1,id2,Stephen,34,2,par1]");
|
||||
expected.put("par2", "[id4,par2,id4,Fabian,31,4,par2]");
|
||||
expected.put("par3", "[id6,par3,id6,Emma,20,6,par3]");
|
||||
expected.put("par4", "[id7,par4,id7,Bob,44,7,par4, id8,par4,id8,Han,56,8,par4]");
|
||||
Map<String, String> expected = getUpsertWithDeleteExpected();
|
||||
checkWrittenData(tempFile, expected);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testInsertWithMiniBatches() throws Exception {
|
||||
// reset the config option
|
||||
conf.setDouble(FlinkOptions.WRITE_BATCH_SIZE, 0.0006); // 630 bytes batch size
|
||||
conf.setDouble(FlinkOptions.WRITE_BATCH_SIZE, 0.0008); // 839 bytes batch size
|
||||
funcWrapper = new StreamWriteFunctionWrapper<>(tempFile.getAbsolutePath(), conf);
|
||||
|
||||
// open the function and ingest data
|
||||
funcWrapper.openFunction();
|
||||
// Each record is 208 bytes. so 4 records expect to trigger a mini-batch write
|
||||
// record (operation: 'I') is 304 bytes and record (operation: 'U') is 352 bytes.
|
||||
// so 3 records expect to trigger a mini-batch write
|
||||
for (RowData rowData : TestData.DATA_SET_INSERT_DUPLICATES) {
|
||||
funcWrapper.invoke(rowData);
|
||||
}
|
||||
|
||||
Map<String, List<HoodieRecord>> dataBuffer = funcWrapper.getDataBuffer();
|
||||
assertThat("Should have 1 data bucket", dataBuffer.size(), is(1));
|
||||
assertThat("2 records expect to flush out as a mini-batch",
|
||||
assertThat("3 records expect to flush out as a mini-batch",
|
||||
dataBuffer.values().stream().findFirst().map(List::size).orElse(-1),
|
||||
is(2));
|
||||
is(3));
|
||||
|
||||
// this triggers the data write and event send
|
||||
funcWrapper.checkpointFunction(1);
|
||||
@@ -472,22 +468,23 @@ public class TestWriteCopyOnWrite {
|
||||
@Test
|
||||
public void testInsertWithDeduplication() throws Exception {
|
||||
// reset the config option
|
||||
conf.setDouble(FlinkOptions.WRITE_BATCH_SIZE, 0.0006); // 630 bytes batch size
|
||||
conf.setDouble(FlinkOptions.WRITE_BATCH_SIZE, 0.0008); // 839 bytes batch size
|
||||
conf.setBoolean(FlinkOptions.INSERT_DROP_DUPS, true);
|
||||
funcWrapper = new StreamWriteFunctionWrapper<>(tempFile.getAbsolutePath(), conf);
|
||||
|
||||
// open the function and ingest data
|
||||
funcWrapper.openFunction();
|
||||
// Each record is 208 bytes. so 4 records expect to trigger a mini-batch write
|
||||
// record (operation: 'I') is 304 bytes and record (operation: 'U') is 352 bytes.
|
||||
// so 3 records expect to trigger a mini-batch write
|
||||
for (RowData rowData : TestData.DATA_SET_INSERT_SAME_KEY) {
|
||||
funcWrapper.invoke(rowData);
|
||||
}
|
||||
|
||||
Map<String, List<HoodieRecord>> dataBuffer = funcWrapper.getDataBuffer();
|
||||
assertThat("Should have 1 data bucket", dataBuffer.size(), is(1));
|
||||
assertThat("2 records expect to flush out as a mini-batch",
|
||||
assertThat("3 records expect to flush out as a mini-batch",
|
||||
dataBuffer.values().stream().findFirst().map(List::size).orElse(-1),
|
||||
is(2));
|
||||
is(3));
|
||||
|
||||
// this triggers the data write and event send
|
||||
funcWrapper.checkpointFunction(1);
|
||||
@@ -612,12 +609,13 @@ public class TestWriteCopyOnWrite {
|
||||
@Test
|
||||
public void testInsertWithSmallBufferSize() throws Exception {
|
||||
// reset the config option
|
||||
conf.setDouble(FlinkOptions.WRITE_TASK_MAX_SIZE, 200.0006); // 630 bytes buffer size
|
||||
conf.setDouble(FlinkOptions.WRITE_TASK_MAX_SIZE, 200.0008); // 839 bytes buffer size
|
||||
funcWrapper = new StreamWriteFunctionWrapper<>(tempFile.getAbsolutePath(), conf);
|
||||
|
||||
// open the function and ingest data
|
||||
funcWrapper.openFunction();
|
||||
// each record is 208 bytes. so 4 records expect to trigger buffer flush:
|
||||
// record (operation: 'I') is 304 bytes and record (operation: 'U') is 352 bytes.
|
||||
// so 3 records expect to trigger a mini-batch write
|
||||
// flush the max size bucket once at a time.
|
||||
for (RowData rowData : TestData.DATA_SET_INSERT_DUPLICATES) {
|
||||
funcWrapper.invoke(rowData);
|
||||
@@ -625,9 +623,9 @@ public class TestWriteCopyOnWrite {
|
||||
|
||||
Map<String, List<HoodieRecord>> dataBuffer = funcWrapper.getDataBuffer();
|
||||
assertThat("Should have 1 data bucket", dataBuffer.size(), is(1));
|
||||
assertThat("2 records expect to flush out as a mini-batch",
|
||||
assertThat("3 records expect to flush out as a mini-batch",
|
||||
dataBuffer.values().stream().findFirst().map(List::size).orElse(-1),
|
||||
is(2));
|
||||
is(3));
|
||||
|
||||
// this triggers the data write and event send
|
||||
funcWrapper.checkpointFunction(1);
|
||||
@@ -676,8 +674,17 @@ public class TestWriteCopyOnWrite {
|
||||
// the last 2 lines are merged
|
||||
expected.put("par1", "["
|
||||
+ "id1,par1,id1,Danny,23,1,par1, "
|
||||
+ "id1,par1,id1,Danny,23,1,par1, "
|
||||
+ "id1,par1,id1,Danny,23,1,par1]");
|
||||
+ "id1,par1,id1,Danny,23,1,par1" + "]");
|
||||
return expected;
|
||||
}
|
||||
|
||||
protected Map<String, String> getUpsertWithDeleteExpected() {
|
||||
Map<String, String> expected = new HashMap<>();
|
||||
// id3, id5 were deleted and id9 is ignored
|
||||
expected.put("par1", "[id1,par1,id1,Danny,24,1,par1, id2,par1,id2,Stephen,34,2,par1]");
|
||||
expected.put("par2", "[id4,par2,id4,Fabian,31,4,par2]");
|
||||
expected.put("par3", "[id6,par3,id6,Emma,20,6,par3]");
|
||||
expected.put("par4", "[id7,par4,id7,Bob,44,7,par4, id8,par4,id8,Han,56,8,par4]");
|
||||
return expected;
|
||||
}
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@ public class TestRowDataKeyGen {
|
||||
assertThat(keyGen1.getPartitionPath(rowData1), is("par1"));
|
||||
|
||||
// null record key and partition path
|
||||
final RowData rowData2 = insertRow(null, StringData.fromString("Danny"), 23,
|
||||
final RowData rowData2 = insertRow(TestConfigurations.ROW_TYPE, null, StringData.fromString("Danny"), 23,
|
||||
TimestampData.fromEpochMillis(1), null);
|
||||
assertThrows(HoodieKeyException.class, () -> keyGen1.getRecordKey(rowData2));
|
||||
assertThat(keyGen1.getPartitionPath(rowData2), is("default"));
|
||||
@@ -77,7 +77,7 @@ public class TestRowDataKeyGen {
|
||||
assertThat(keyGen1.getPartitionPath(rowData1), is("par1/1970-01-01T00:00:00.001"));
|
||||
|
||||
// null record key and partition path
|
||||
final RowData rowData2 = insertRow(null, null, 23, null, null);
|
||||
final RowData rowData2 = insertRow(TestConfigurations.ROW_TYPE,null, null, 23, null, null);
|
||||
assertThrows(HoodieKeyException.class, () -> keyGen1.getRecordKey(rowData2));
|
||||
assertThat(keyGen1.getPartitionPath(rowData2), is("default/default"));
|
||||
// empty record key and partition path
|
||||
|
||||
@@ -261,6 +261,7 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
|
||||
Configuration conf = TestConfigurations.getDefaultConf(tempFile.getAbsolutePath());
|
||||
conf.setString(FlinkOptions.TABLE_NAME, "t1");
|
||||
conf.setString(FlinkOptions.TABLE_TYPE, FlinkOptions.TABLE_TYPE_MERGE_ON_READ);
|
||||
conf.setBoolean(FlinkOptions.CHANGELOG_ENABLED, true);
|
||||
|
||||
// write one commit
|
||||
TestData.writeData(TestData.DATA_SET_INSERT, conf);
|
||||
@@ -276,17 +277,20 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
|
||||
options.put(FlinkOptions.READ_AS_STREAMING.key(), "true");
|
||||
options.put(FlinkOptions.READ_STREAMING_CHECK_INTERVAL.key(), "2");
|
||||
options.put(FlinkOptions.READ_STREAMING_START_COMMIT.key(), latestCommit);
|
||||
options.put(FlinkOptions.CHANGELOG_ENABLED.key(), "true");
|
||||
String hoodieTableDDL = TestConfigurations.getCreateHoodieTableDDL("t1", options);
|
||||
streamTableEnv.executeSql(hoodieTableDDL);
|
||||
|
||||
List<Row> result = execSelectSql(streamTableEnv, "select * from t1", 10);
|
||||
final String expected = "["
|
||||
+ "id1,Danny,24,1970-01-01T00:00:00.001,par1, "
|
||||
+ "id2,Stephen,34,1970-01-01T00:00:00.002,par1, "
|
||||
+ "id3,null,null,null,null, "
|
||||
+ "id5,null,null,null,null, "
|
||||
+ "id9,null,null,null,null]";
|
||||
assertRowsEquals(result, expected);
|
||||
final String sinkDDL = "create table sink(\n"
|
||||
+ " name varchar(20),\n"
|
||||
+ " age_sum int\n"
|
||||
+ ") with (\n"
|
||||
+ " 'connector' = '" + CollectSinkTableFactory.FACTORY_ID + "'"
|
||||
+ ")";
|
||||
List<Row> result = execSelectSql(streamTableEnv,
|
||||
"select name, sum(age) from t1 group by name", sinkDDL, 10);
|
||||
final String expected = "[+I(Danny,24), +I(Stephen,34)]";
|
||||
assertRowsEquals(result, expected, true);
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@@ -724,6 +728,11 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
|
||||
} else {
|
||||
sinkDDL = TestConfigurations.getCollectSinkDDL("sink");
|
||||
}
|
||||
return execSelectSql(tEnv, select, sinkDDL, timeout);
|
||||
}
|
||||
|
||||
private List<Row> execSelectSql(TableEnvironment tEnv, String select, String sinkDDL, long timeout)
|
||||
throws InterruptedException {
|
||||
tEnv.executeSql(sinkDDL);
|
||||
TableResult tableResult = tEnv.executeSql("insert into sink " + select);
|
||||
// wait for the timeout then cancels the job
|
||||
@@ -731,7 +740,7 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
|
||||
tableResult.getJobClient().ifPresent(JobClient::cancel);
|
||||
tEnv.executeSql("DROP TABLE IF EXISTS sink");
|
||||
return CollectSinkTableFactory.RESULT.values().stream()
|
||||
.flatMap(Collection::stream)
|
||||
.collect(Collectors.toList());
|
||||
.flatMap(Collection::stream)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,6 +21,7 @@ package org.apache.hudi.table.format;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
import org.apache.hudi.configuration.FlinkOptions;
|
||||
import org.apache.hudi.table.HoodieTableSource;
|
||||
import org.apache.hudi.table.format.cow.CopyOnWriteInputFormat;
|
||||
import org.apache.hudi.table.format.mor.MergeOnReadInputFormat;
|
||||
import org.apache.hudi.util.StreamerUtil;
|
||||
import org.apache.hudi.utils.TestConfigurations;
|
||||
@@ -60,9 +61,14 @@ public class TestInputFormat {
|
||||
File tempFile;
|
||||
|
||||
void beforeEach(HoodieTableType tableType) throws IOException {
|
||||
beforeEach(tableType, Collections.emptyMap());
|
||||
}
|
||||
|
||||
void beforeEach(HoodieTableType tableType, Map<String, String> options) throws IOException {
|
||||
conf = TestConfigurations.getDefaultConf(tempFile.getAbsolutePath());
|
||||
conf.setString(FlinkOptions.TABLE_TYPE, tableType.name());
|
||||
conf.setBoolean(FlinkOptions.COMPACTION_ASYNC_ENABLED, false); // close the async compaction
|
||||
options.forEach((key, value) -> conf.setString(key, value));
|
||||
|
||||
StreamerUtil.initTableIfNotExists(conf);
|
||||
this.tableSource = new HoodieTableSource(
|
||||
@@ -163,8 +169,62 @@ public class TestInputFormat {
|
||||
}
|
||||
|
||||
@Test
|
||||
void testReadWithDeletes() throws Exception {
|
||||
beforeEach(HoodieTableType.MERGE_ON_READ);
|
||||
void testReadBaseAndLogFilesWithDeletes() throws Exception {
|
||||
Map<String, String> options = new HashMap<>();
|
||||
options.put(FlinkOptions.CHANGELOG_ENABLED.key(), "true");
|
||||
beforeEach(HoodieTableType.MERGE_ON_READ, options);
|
||||
|
||||
// write base first with compaction.
|
||||
conf.setBoolean(FlinkOptions.COMPACTION_ASYNC_ENABLED, true);
|
||||
conf.setInteger(FlinkOptions.COMPACTION_DELTA_COMMITS, 1);
|
||||
TestData.writeData(TestData.DATA_SET_INSERT, conf);
|
||||
|
||||
// write another commit using logs and read again.
|
||||
conf.setBoolean(FlinkOptions.COMPACTION_ASYNC_ENABLED, false);
|
||||
TestData.writeData(TestData.DATA_SET_UPDATE_DELETE, conf);
|
||||
|
||||
InputFormat<RowData, ?> inputFormat = this.tableSource.getInputFormat();
|
||||
assertThat(inputFormat, instanceOf(MergeOnReadInputFormat.class));
|
||||
|
||||
// when isEmitDelete is false.
|
||||
List<RowData> result1 = readData(inputFormat);
|
||||
|
||||
final String actual1 = TestData.rowDataToString(result1, true);
|
||||
final String expected1 = "["
|
||||
+ "+I(id1,Danny,24,1970-01-01T00:00:00.001,par1), "
|
||||
+ "+I(id2,Stephen,34,1970-01-01T00:00:00.002,par1), "
|
||||
+ "+I(id4,Fabian,31,1970-01-01T00:00:00.004,par2), "
|
||||
+ "+I(id6,Emma,20,1970-01-01T00:00:00.006,par3), "
|
||||
+ "+I(id7,Bob,44,1970-01-01T00:00:00.007,par4), "
|
||||
+ "+I(id8,Han,56,1970-01-01T00:00:00.008,par4)]";
|
||||
assertThat(actual1, is(expected1));
|
||||
|
||||
// refresh the input format and set isEmitDelete to true.
|
||||
this.tableSource.reset();
|
||||
inputFormat = this.tableSource.getInputFormat();
|
||||
((MergeOnReadInputFormat) inputFormat).isEmitDelete(true);
|
||||
|
||||
List<RowData> result2 = readData(inputFormat);
|
||||
|
||||
final String actual2 = TestData.rowDataToString(result2, true);
|
||||
final String expected2 = "["
|
||||
+ "+I(id1,Danny,24,1970-01-01T00:00:00.001,par1), "
|
||||
+ "+I(id2,Stephen,34,1970-01-01T00:00:00.002,par1), "
|
||||
+ "-D(id3,Julian,53,1970-01-01T00:00:00.003,par2), "
|
||||
+ "+I(id4,Fabian,31,1970-01-01T00:00:00.004,par2), "
|
||||
+ "-D(id5,Sophia,18,1970-01-01T00:00:00.005,par3), "
|
||||
+ "+I(id6,Emma,20,1970-01-01T00:00:00.006,par3), "
|
||||
+ "+I(id7,Bob,44,1970-01-01T00:00:00.007,par4), "
|
||||
+ "+I(id8,Han,56,1970-01-01T00:00:00.008,par4), "
|
||||
+ "-D(id9,Jane,19,1970-01-01T00:00:00.006,par3)]";
|
||||
assertThat(actual2, is(expected2));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testReadWithDeletesMOR() throws Exception {
|
||||
Map<String, String> options = new HashMap<>();
|
||||
options.put(FlinkOptions.CHANGELOG_ENABLED.key(), "true");
|
||||
beforeEach(HoodieTableType.MERGE_ON_READ, options);
|
||||
|
||||
// write another commit to read again
|
||||
TestData.writeData(TestData.DATA_SET_UPDATE_DELETE, conf);
|
||||
@@ -175,13 +235,32 @@ public class TestInputFormat {
|
||||
|
||||
List<RowData> result = readData(inputFormat);
|
||||
|
||||
final String actual = TestData.rowDataToString(result);
|
||||
final String actual = TestData.rowDataToString(result, true);
|
||||
final String expected = "["
|
||||
+ "id1,Danny,24,1970-01-01T00:00:00.001,par1, "
|
||||
+ "id2,Stephen,34,1970-01-01T00:00:00.002,par1, "
|
||||
+ "id3,null,null,null,null, "
|
||||
+ "id5,null,null,null,null, "
|
||||
+ "id9,null,null,null,null]";
|
||||
+ "+I(id1,Danny,24,1970-01-01T00:00:00.001,par1), "
|
||||
+ "+I(id2,Stephen,34,1970-01-01T00:00:00.002,par1), "
|
||||
+ "-D(id3,Julian,53,1970-01-01T00:00:00.003,par2), "
|
||||
+ "-D(id5,Sophia,18,1970-01-01T00:00:00.005,par3), "
|
||||
+ "-D(id9,Jane,19,1970-01-01T00:00:00.006,par3)]";
|
||||
assertThat(actual, is(expected));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testReadWithDeletesCOW() throws Exception {
|
||||
beforeEach(HoodieTableType.COPY_ON_WRITE);
|
||||
|
||||
// write another commit to read again
|
||||
TestData.writeData(TestData.DATA_SET_UPDATE_DELETE, conf);
|
||||
|
||||
InputFormat<RowData, ?> inputFormat = this.tableSource.getInputFormat();
|
||||
assertThat(inputFormat, instanceOf(CopyOnWriteInputFormat.class));
|
||||
|
||||
List<RowData> result = readData(inputFormat);
|
||||
|
||||
final String actual = TestData.rowDataToString(result, true);
|
||||
final String expected = "["
|
||||
+ "+I(id1,Danny,24,1970-01-01T00:00:00.001,par1), "
|
||||
+ "+I(id2,Stephen,34,1970-01-01T00:00:00.002,par1)]";
|
||||
assertThat(actual, is(expected));
|
||||
}
|
||||
|
||||
@@ -205,6 +284,33 @@ public class TestInputFormat {
|
||||
assertThat(actual, is(expected));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testReadChangesUnMergedMOR() throws Exception {
|
||||
Map<String, String> options = new HashMap<>();
|
||||
options.put(FlinkOptions.CHANGELOG_ENABLED.key(), "true");
|
||||
beforeEach(HoodieTableType.MERGE_ON_READ, options);
|
||||
|
||||
// write another commit to read again
|
||||
TestData.writeData(TestData.DATA_SET_INSERT_UPDATE_DELETE, conf);
|
||||
|
||||
InputFormat<RowData, ?> inputFormat = this.tableSource.getInputFormat();
|
||||
assertThat(inputFormat, instanceOf(MergeOnReadInputFormat.class));
|
||||
|
||||
List<RowData> result = readData(inputFormat);
|
||||
|
||||
final String actual = TestData.rowDataToString(result, true);
|
||||
final String expected = "["
|
||||
+ "+I(id1,Danny,19,1970-01-01T00:00:00.001,par1), "
|
||||
+ "-U(id1,Danny,19,1970-01-01T00:00:00.001,par1), "
|
||||
+ "+U(id1,Danny,20,1970-01-01T00:00:00.002,par1), "
|
||||
+ "-U(id1,Danny,20,1970-01-01T00:00:00.002,par1), "
|
||||
+ "+U(id1,Danny,21,1970-01-01T00:00:00.003,par1), "
|
||||
+ "-U(id1,Danny,21,1970-01-01T00:00:00.003,par1), "
|
||||
+ "+U(id1,Danny,22,1970-01-01T00:00:00.004,par1), "
|
||||
+ "-D(id1,Danny,22,1970-01-01T00:00:00.005,par1)]";
|
||||
assertThat(actual, is(expected));
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Utilities
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
@@ -234,15 +234,53 @@ public class TestData {
|
||||
TimestampData.fromEpochMillis(6), StringData.fromString("par3"))
|
||||
);
|
||||
|
||||
public static List<RowData> DATA_SET_INSERT_UPDATE_DELETE = Arrays.asList(
|
||||
// INSERT
|
||||
insertRow(StringData.fromString("id1"), StringData.fromString("Danny"), 19,
|
||||
TimestampData.fromEpochMillis(1), StringData.fromString("par1")),
|
||||
// UPDATE
|
||||
updateBeforeRow(StringData.fromString("id1"), StringData.fromString("Danny"), 19,
|
||||
TimestampData.fromEpochMillis(1), StringData.fromString("par1")),
|
||||
updateAfterRow(StringData.fromString("id1"), StringData.fromString("Danny"), 20,
|
||||
TimestampData.fromEpochMillis(2), StringData.fromString("par1")),
|
||||
updateBeforeRow(StringData.fromString("id1"), StringData.fromString("Danny"), 20,
|
||||
TimestampData.fromEpochMillis(2), StringData.fromString("par1")),
|
||||
updateAfterRow(StringData.fromString("id1"), StringData.fromString("Danny"), 21,
|
||||
TimestampData.fromEpochMillis(3), StringData.fromString("par1")),
|
||||
updateBeforeRow(StringData.fromString("id1"), StringData.fromString("Danny"), 21,
|
||||
TimestampData.fromEpochMillis(3), StringData.fromString("par1")),
|
||||
updateAfterRow(StringData.fromString("id1"), StringData.fromString("Danny"), 22,
|
||||
TimestampData.fromEpochMillis(4), StringData.fromString("par1")),
|
||||
// DELETE
|
||||
deleteRow(StringData.fromString("id1"), StringData.fromString("Danny"), 22,
|
||||
TimestampData.fromEpochMillis(5), StringData.fromString("par1"))
|
||||
);
|
||||
|
||||
/**
|
||||
* Returns string format of a list of RowData.
|
||||
*/
|
||||
public static String rowDataToString(List<RowData> rows) {
|
||||
return rowDataToString(rows, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns string format of a list of RowData.
|
||||
*
|
||||
* @param withChangeFlag whether to print the change flag
|
||||
*/
|
||||
public static String rowDataToString(List<RowData> rows, boolean withChangeFlag) {
|
||||
DataStructureConverter<Object, Object> converter =
|
||||
DataStructureConverters.getConverter(TestConfigurations.ROW_DATA_TYPE);
|
||||
return rows.stream()
|
||||
.map(row -> converter.toExternal(row).toString())
|
||||
.sorted(Comparator.naturalOrder())
|
||||
.sorted(Comparator.comparing(o -> toStringSafely(o.getString(0))))
|
||||
.map(row -> {
|
||||
final String rowStr = converter.toExternal(row).toString();
|
||||
if (withChangeFlag) {
|
||||
return row.getRowKind().shortString() + "(" + rowStr + ")";
|
||||
} else {
|
||||
return rowStr;
|
||||
}
|
||||
})
|
||||
.collect(Collectors.toList()).toString();
|
||||
}
|
||||
|
||||
@@ -287,7 +325,30 @@ public class TestData {
|
||||
* @param expected Expected string of the sorted rows
|
||||
*/
|
||||
public static void assertRowsEquals(List<Row> rows, String expected) {
|
||||
assertRowsEquals(rows, expected, 0);
|
||||
assertRowsEquals(rows, expected, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sort the {@code rows} using field at index 0 and asserts
|
||||
* it equals with the expected string {@code expected}.
|
||||
*
|
||||
* @param rows Actual result rows
|
||||
* @param expected Expected string of the sorted rows
|
||||
* @param withChangeFlag Whether compares with change flags
|
||||
*/
|
||||
public static void assertRowsEquals(List<Row> rows, String expected, boolean withChangeFlag) {
|
||||
String rowsString = rows.stream()
|
||||
.sorted(Comparator.comparing(o -> toStringSafely(o.getField(0))))
|
||||
.map(row -> {
|
||||
final String rowStr = row.toString();
|
||||
if (withChangeFlag) {
|
||||
return row.getKind().shortString() + "(" + rowStr + ")";
|
||||
} else {
|
||||
return rowStr;
|
||||
}
|
||||
})
|
||||
.collect(Collectors.toList()).toString();
|
||||
assertThat(rowsString, is(expected));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -573,7 +634,11 @@ public class TestData {
|
||||
}
|
||||
|
||||
public static BinaryRowData insertRow(Object... fields) {
|
||||
LogicalType[] types = TestConfigurations.ROW_TYPE.getFields().stream().map(RowType.RowField::getType)
|
||||
return insertRow(TestConfigurations.ROW_TYPE, fields);
|
||||
}
|
||||
|
||||
public static BinaryRowData insertRow(RowType rowType, Object... fields) {
|
||||
LogicalType[] types = rowType.getFields().stream().map(RowType.RowField::getType)
|
||||
.toArray(LogicalType[]::new);
|
||||
assertEquals(
|
||||
"Filed count inconsistent with type information",
|
||||
@@ -599,4 +664,16 @@ public class TestData {
|
||||
rowData.setRowKind(RowKind.DELETE);
|
||||
return rowData;
|
||||
}
|
||||
|
||||
private static BinaryRowData updateBeforeRow(Object... fields) {
|
||||
BinaryRowData rowData = insertRow(fields);
|
||||
rowData.setRowKind(RowKind.UPDATE_BEFORE);
|
||||
return rowData;
|
||||
}
|
||||
|
||||
private static BinaryRowData updateAfterRow(Object... fields) {
|
||||
BinaryRowData rowData = insertRow(fields);
|
||||
rowData.setRowKind(RowKind.UPDATE_AFTER);
|
||||
return rowData;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,136 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.utils;
|
||||
|
||||
import org.apache.hudi.client.model.HoodieRowData;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
|
||||
import org.apache.flink.table.api.DataTypes;
|
||||
import org.apache.flink.table.data.DecimalData;
|
||||
import org.apache.flink.table.data.RowData;
|
||||
import org.apache.flink.table.data.StringData;
|
||||
import org.apache.flink.table.types.DataType;
|
||||
import org.apache.flink.table.types.logical.RowType;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.util.Random;
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
/**
|
||||
* Unit tests {@link HoodieRowData}.
|
||||
*/
|
||||
public class TestHoodieRowData {
|
||||
private final int metaColumnsNum = HoodieRecord.HOODIE_META_COLUMNS_WITH_OPERATION.size();
|
||||
private static final Random RANDOM = new Random();
|
||||
private static final int INTEGER_INDEX = 0;
|
||||
private static final int STRING_INDEX = 1;
|
||||
private static final int BOOLEAN_INDEX = 2;
|
||||
private static final int SHORT_INDEX = 3;
|
||||
private static final int BYTE_INDEX = 4;
|
||||
private static final int LONG_INDEX = 5;
|
||||
private static final int FLOAT_INDEX = 6;
|
||||
private static final int DOUBLE_INDEX = 7;
|
||||
private static final int DECIMAL_INDEX = 8;
|
||||
private static final int BINARY_INDEX = 9;
|
||||
private static final int ROW_INDEX = 10;
|
||||
|
||||
private static final DataType BASIC_DATA_TYPE = DataTypes.ROW(
|
||||
DataTypes.FIELD("integer", DataTypes.INT()),
|
||||
DataTypes.FIELD("string", DataTypes.STRING()),
|
||||
DataTypes.FIELD("boolean", DataTypes.BOOLEAN()),
|
||||
DataTypes.FIELD("short", DataTypes.SMALLINT()),
|
||||
DataTypes.FIELD("byte", DataTypes.TINYINT()),
|
||||
DataTypes.FIELD("long", DataTypes.BIGINT()),
|
||||
DataTypes.FIELD("float", DataTypes.FLOAT()),
|
||||
DataTypes.FIELD("double", DataTypes.DOUBLE()),
|
||||
DataTypes.FIELD("decimal", DataTypes.DECIMAL(10, 4)),
|
||||
DataTypes.FIELD("binary", DataTypes.BYTES()),
|
||||
DataTypes.FIELD("row", DataTypes.ROW()))
|
||||
.notNull();
|
||||
private static final RowType ROW_TYPE = (RowType) BASIC_DATA_TYPE.getLogicalType();
|
||||
|
||||
@Test
|
||||
public void testGet() {
|
||||
Object[] values = getRandomValue(true);
|
||||
RowData rowData = TestData.insertRow(ROW_TYPE, values);
|
||||
|
||||
HoodieRowData hoodieRowData = new HoodieRowData("commitTime", "commitSeqNo", "recordKey", "partitionPath", "fileName",
|
||||
rowData, true);
|
||||
assertValues(hoodieRowData, "commitTime", "commitSeqNo", "recordKey", "partitionPath",
|
||||
"fileName", values);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetches a random Object[] of values for testing.
|
||||
*
|
||||
* @param haveRowType true if rowType need to be added as one of the elements in the Object[]
|
||||
* @return the random Object[] thus generated
|
||||
*/
|
||||
private Object[] getRandomValue(boolean haveRowType) {
|
||||
Object[] values = new Object[11];
|
||||
values[INTEGER_INDEX] = RANDOM.nextInt();
|
||||
values[STRING_INDEX] = StringData.fromString(UUID.randomUUID().toString());
|
||||
values[BOOLEAN_INDEX] = RANDOM.nextBoolean();
|
||||
values[SHORT_INDEX] = (short) RANDOM.nextInt(2);
|
||||
byte[] bytes = new byte[1];
|
||||
RANDOM.nextBytes(bytes);
|
||||
values[BYTE_INDEX] = bytes[0];
|
||||
values[LONG_INDEX] = RANDOM.nextLong();
|
||||
values[FLOAT_INDEX] = RANDOM.nextFloat();
|
||||
values[DOUBLE_INDEX] = RANDOM.nextDouble();
|
||||
values[DECIMAL_INDEX] = DecimalData.fromBigDecimal(new BigDecimal("1005.12313"), 10, 4);
|
||||
bytes = new byte[20];
|
||||
RANDOM.nextBytes(bytes);
|
||||
values[BINARY_INDEX] = bytes;
|
||||
if (haveRowType) {
|
||||
Object[] rowField = getRandomValue(false);
|
||||
values[ROW_INDEX] = TestData.insertRow(ROW_TYPE, rowField);
|
||||
}
|
||||
return values;
|
||||
}
|
||||
|
||||
private void assertValues(HoodieRowData hoodieRowData, String commitTime, String commitSeqNo, String recordKey, String partitionPath,
|
||||
String filename, Object[] values) {
|
||||
assertEquals(commitTime, hoodieRowData.getString(0).toString());
|
||||
assertEquals(commitSeqNo, hoodieRowData.getString(1).toString());
|
||||
assertEquals(recordKey, hoodieRowData.getString(2).toString());
|
||||
assertEquals(partitionPath, hoodieRowData.getString(3).toString());
|
||||
assertEquals(filename, hoodieRowData.getString(4).toString());
|
||||
assertEquals("I", hoodieRowData.getString(5).toString());
|
||||
// row data.
|
||||
assertEquals(values[INTEGER_INDEX], hoodieRowData.getInt(INTEGER_INDEX + metaColumnsNum));
|
||||
assertEquals(values[STRING_INDEX], hoodieRowData.getString(STRING_INDEX + metaColumnsNum));
|
||||
assertEquals(values[BOOLEAN_INDEX], hoodieRowData.getBoolean(BOOLEAN_INDEX + metaColumnsNum));
|
||||
assertEquals(values[SHORT_INDEX], hoodieRowData.getShort(SHORT_INDEX + metaColumnsNum));
|
||||
assertEquals(values[BYTE_INDEX], hoodieRowData.getByte(BYTE_INDEX + metaColumnsNum));
|
||||
assertEquals(values[LONG_INDEX], hoodieRowData.getLong(LONG_INDEX + metaColumnsNum));
|
||||
assertEquals(values[FLOAT_INDEX], hoodieRowData.getFloat(FLOAT_INDEX + metaColumnsNum));
|
||||
assertEquals(values[DOUBLE_INDEX], hoodieRowData.getDouble(DOUBLE_INDEX + metaColumnsNum));
|
||||
assertEquals(values[DECIMAL_INDEX], hoodieRowData.getDecimal(DECIMAL_INDEX + metaColumnsNum, 10, 4));
|
||||
byte[] exceptBinary = (byte[]) values[BINARY_INDEX];
|
||||
byte[] binary = hoodieRowData.getBinary(BINARY_INDEX + metaColumnsNum);
|
||||
for (int i = 0; i < exceptBinary.length; i++) {
|
||||
assertEquals(exceptBinary[i], binary[i]);
|
||||
}
|
||||
assertEquals(values[ROW_INDEX], hoodieRowData.getRow(ROW_INDEX + metaColumnsNum, values.length));
|
||||
}
|
||||
}
|
||||
@@ -150,6 +150,7 @@ public class CollectSinkTableFactory implements DynamicTableSinkFactory {
|
||||
public void invoke(RowData value, SinkFunction.Context context) {
|
||||
Row row = (Row) converter.toExternal(value);
|
||||
assert row != null;
|
||||
row.setKind(value.getRowKind());
|
||||
RESULT.get(taskID).add(row);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user