1
0

[HUDI-2551] Support DefaultHoodieRecordPayload for flink (#3792)

This commit is contained in:
Danny Chan
2021-10-14 13:46:53 +08:00
committed by GitHub
parent abf3e3fe71
commit f897e6d73e
6 changed files with 48 additions and 2 deletions

View File

@@ -65,7 +65,7 @@ public class FlinkLazyInsertIterable<T extends HoodieRecordPayload> extends Hood
try {
final Schema schema = new Schema.Parser().parse(hoodieConfig.getSchema());
bufferedIteratorExecutor =
new BoundedInMemoryExecutor<>(hoodieConfig.getWriteBufferLimitBytes(), new IteratorBasedQueueProducer<>(inputItr), Option.of(getInsertHandler()), getTransformFunction(schema));
new BoundedInMemoryExecutor<>(hoodieConfig.getWriteBufferLimitBytes(), new IteratorBasedQueueProducer<>(inputItr), Option.of(getInsertHandler()), getTransformFunction(schema, hoodieConfig));
final List<WriteStatus> result = bufferedIteratorExecutor.execute();
assert result != null && !result.isEmpty() && !bufferedIteratorExecutor.isRemaining();
return result;

View File

@@ -100,7 +100,7 @@ public class FlinkOptions extends HoodieConfig {
public static final ConfigOption<Integer> METADATA_COMPACTION_DELTA_COMMITS = ConfigOptions
.key("metadata.compaction.delta_commits")
.intType()
.defaultValue(24)
.defaultValue(10)
.withDescription("Max delta commits for metadata table to trigger compaction, default 24");
// ------------------------------------------------------------------------

View File

@@ -129,6 +129,13 @@ public class BootstrapOperator<I, O extends HoodieRecord>
WriteOperationType.fromValue(conf.getString(FlinkOptions.OPERATION)),
HoodieTableType.valueOf(conf.getString(FlinkOptions.TABLE_TYPE)));
preLoadIndexRecords();
}
/**
* Load the index records before {@link #processElement}.
*/
protected void preLoadIndexRecords() throws Exception {
String basePath = hoodieTable.getMetaClient().getBasePath();
int taskID = getRuntimeContext().getIndexOfThisSubtask();
LOG.info("Start loading records in table {} into the index state, taskId = {}", basePath, taskID);

View File

@@ -56,6 +56,11 @@ public class BatchBootstrapOperator<I, O extends HoodieRecord>
this.haveSuccessfulCommits = StreamerUtil.haveSuccessfulCommits(hoodieTable.getMetaClient());
}
@Override
protected void preLoadIndexRecords() {
// no operation
}
@Override
@SuppressWarnings("unchecked")
public void processElement(StreamRecord<I> element) throws Exception {

View File

@@ -37,6 +37,7 @@ import org.apache.hudi.common.util.ReflectionUtils;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.config.HoodieCompactionConfig;
import org.apache.hudi.config.HoodieMemoryConfig;
import org.apache.hudi.config.HoodiePayloadConfig;
import org.apache.hudi.config.HoodieStorageConfig;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.configuration.FlinkOptions;
@@ -189,6 +190,10 @@ public class StreamerUtil {
.enable(conf.getBoolean(FlinkOptions.METADATA_ENABLED))
.withMaxNumDeltaCommitsBeforeCompaction(conf.getInteger(FlinkOptions.METADATA_COMPACTION_DELTA_COMMITS))
.build())
.withPayloadConfig(HoodiePayloadConfig.newBuilder()
.withPayloadOrderingField(conf.getString(FlinkOptions.PRECOMBINE_FIELD))
.withPayloadEventTimeField(conf.getString(FlinkOptions.PRECOMBINE_FIELD))
.build())
.withEmbeddedTimelineServerReuseEnabled(true) // make write client embedded timeline service singleton
.withAutoCommit(false)
.withAllowOperationMetadataField(conf.getBoolean(FlinkOptions.CHANGELOG_ENABLED))

View File

@@ -18,6 +18,7 @@
package org.apache.hudi.table;
import org.apache.hudi.common.model.DefaultHoodieRecordPayload;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.configuration.FlinkOptions;
@@ -584,6 +585,34 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
assertRowsEquals(result, "[+I[id1, Sophia, 18, 1970-01-01T00:00:05, par1]]");
}
@Test
void testUpdateWithDefaultHoodieRecordPayload() {
TableEnvironment tableEnv = batchTableEnv;
String hoodieTableDDL = sql("t1")
.field("id int")
.field("name string")
.field("price double")
.field("ts bigint")
.pkField("id")
.noPartition()
.option(FlinkOptions.PATH, tempFile.getAbsolutePath())
.option(FlinkOptions.PAYLOAD_CLASS_NAME, DefaultHoodieRecordPayload.class.getName())
.end();
tableEnv.executeSql(hoodieTableDDL);
final String insertInto1 = "insert into t1 values\n"
+ "(1,'a1',20,20)";
execInsertSql(tableEnv, insertInto1);
final String insertInto4 = "insert into t1 values\n"
+ "(1,'a1',20,1)";
execInsertSql(tableEnv, insertInto4);
List<Row> result = CollectionUtil.iterableToList(
() -> tableEnv.sqlQuery("select * from t1").execute().collect());
assertRowsEquals(result, "[+I[1, a1, 20.0, 20]]");
}
@ParameterizedTest
@MethodSource("executionModeAndTableTypeParams")
void testWriteNonPartitionedTable(ExecMode execMode, HoodieTableType tableType) {