[HUDI-1814] Non partitioned table for Flink writer (#2859)
This commit is contained in:
@@ -239,9 +239,9 @@ public class FlinkOptions {
|
|||||||
public static final ConfigOption<String> PARTITION_PATH_FIELD = ConfigOptions
|
public static final ConfigOption<String> PARTITION_PATH_FIELD = ConfigOptions
|
||||||
.key(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY)
|
.key(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY)
|
||||||
.stringType()
|
.stringType()
|
||||||
.defaultValue("partition-path")
|
.defaultValue("")
|
||||||
.withDescription("Partition path field. Value to be used at the `partitionPath` component of `HoodieKey`.\n"
|
.withDescription("Partition path field. Value to be used at the `partitionPath` component of `HoodieKey`.\n"
|
||||||
+ "Actual value obtained by invoking .toString()");
|
+ "Actual value obtained by invoking .toString(), default ''");
|
||||||
|
|
||||||
public static final ConfigOption<Boolean> PARTITION_PATH_URL_ENCODE = ConfigOptions
|
public static final ConfigOption<Boolean> PARTITION_PATH_URL_ENCODE = ConfigOptions
|
||||||
.key("write.partition.url_encode")
|
.key("write.partition.url_encode")
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ package org.apache.hudi.table;
|
|||||||
|
|
||||||
import org.apache.hudi.configuration.FlinkOptions;
|
import org.apache.hudi.configuration.FlinkOptions;
|
||||||
import org.apache.hudi.keygen.ComplexAvroKeyGenerator;
|
import org.apache.hudi.keygen.ComplexAvroKeyGenerator;
|
||||||
|
import org.apache.hudi.keygen.NonpartitionedAvroKeyGenerator;
|
||||||
import org.apache.hudi.util.AvroSchemaConverter;
|
import org.apache.hudi.util.AvroSchemaConverter;
|
||||||
|
|
||||||
import org.apache.flink.configuration.ConfigOption;
|
import org.apache.flink.configuration.ConfigOption;
|
||||||
@@ -129,13 +130,21 @@ public class HoodieTableFactory implements DynamicTableSourceFactory, DynamicTab
|
|||||||
String recordKey = String.join(",", pkColumns);
|
String recordKey = String.join(",", pkColumns);
|
||||||
conf.setString(FlinkOptions.RECORD_KEY_FIELD, recordKey);
|
conf.setString(FlinkOptions.RECORD_KEY_FIELD, recordKey);
|
||||||
}
|
}
|
||||||
List<String> partitions = table.getPartitionKeys();
|
List<String> partitionKeys = table.getPartitionKeys();
|
||||||
if (partitions.size() > 0) {
|
if (partitionKeys.size() > 0) {
|
||||||
// the PARTITIONED BY syntax always has higher priority than option FlinkOptions#PARTITION_PATH_FIELD
|
// the PARTITIONED BY syntax always has higher priority than option FlinkOptions#PARTITION_PATH_FIELD
|
||||||
conf.setString(FlinkOptions.PARTITION_PATH_FIELD, String.join(",", partitions));
|
conf.setString(FlinkOptions.PARTITION_PATH_FIELD, String.join(",", partitionKeys));
|
||||||
}
|
}
|
||||||
// tweak the key gen class if possible
|
// tweak the key gen class if possible
|
||||||
boolean complexHoodieKey = pkColumns.size() > 1 || partitions.size() > 1;
|
final String[] partitions = conf.getString(FlinkOptions.PARTITION_PATH_FIELD).split(",");
|
||||||
|
if (partitions.length == 1 && partitions[0].equals("")) {
|
||||||
|
conf.setString(FlinkOptions.KEYGEN_CLASS, NonpartitionedAvroKeyGenerator.class.getName());
|
||||||
|
LOG.info("Table option [{}] is reset to {} because this is a non-partitioned table",
|
||||||
|
FlinkOptions.KEYGEN_CLASS.key(), NonpartitionedAvroKeyGenerator.class.getName());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
final String[] pks = conf.getString(FlinkOptions.RECORD_KEY_FIELD).split(",");
|
||||||
|
boolean complexHoodieKey = pks.length > 1 || partitions.length > 1;
|
||||||
if (complexHoodieKey && FlinkOptions.isDefaultValueDefined(conf, FlinkOptions.KEYGEN_CLASS)) {
|
if (complexHoodieKey && FlinkOptions.isDefaultValueDefined(conf, FlinkOptions.KEYGEN_CLASS)) {
|
||||||
conf.setString(FlinkOptions.KEYGEN_CLASS, ComplexAvroKeyGenerator.class.getName());
|
conf.setString(FlinkOptions.KEYGEN_CLASS, ComplexAvroKeyGenerator.class.getName());
|
||||||
LOG.info("Table option [{}] is reset to {} because record key or partition path has two or more fields",
|
LOG.info("Table option [{}] is reset to {} because record key or partition path has two or more fields",
|
||||||
|
|||||||
@@ -352,6 +352,42 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
|
|||||||
assertRowsEquals(result, "[id1,Sophia,18,1970-01-01T00:00:05,par1]");
|
assertRowsEquals(result, "[id1,Sophia,18,1970-01-01T00:00:05,par1]");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ParameterizedTest
|
||||||
|
@EnumSource(value = ExecMode.class)
|
||||||
|
void testWriteNonPartitionedTable(ExecMode execMode) {
|
||||||
|
TableEnvironment tableEnv = execMode == ExecMode.BATCH ? batchTableEnv : streamTableEnv;
|
||||||
|
String hoodieTableDDL = "create table t1(\n"
|
||||||
|
+ " uuid varchar(20),\n"
|
||||||
|
+ " name varchar(10),\n"
|
||||||
|
+ " age int,\n"
|
||||||
|
+ " ts timestamp(3),\n"
|
||||||
|
+ " `partition` varchar(20),\n"
|
||||||
|
+ " PRIMARY KEY(uuid) NOT ENFORCED\n"
|
||||||
|
+ ")\n"
|
||||||
|
+ "with (\n"
|
||||||
|
+ " 'connector' = 'hudi',\n"
|
||||||
|
+ " 'path' = '" + tempFile.getAbsolutePath() + "'\n"
|
||||||
|
+ ")";
|
||||||
|
tableEnv.executeSql(hoodieTableDDL);
|
||||||
|
|
||||||
|
final String insertInto1 = "insert into t1 values\n"
|
||||||
|
+ "('id1','Danny',23,TIMESTAMP '1970-01-01 00:00:01','par1')";
|
||||||
|
|
||||||
|
execInsertSql(tableEnv, insertInto1);
|
||||||
|
|
||||||
|
final String insertInto2 = "insert into t1 values\n"
|
||||||
|
+ "('id1','Stephen',33,TIMESTAMP '1970-01-01 00:00:02','par2'),\n"
|
||||||
|
+ "('id1','Julian',53,TIMESTAMP '1970-01-01 00:00:03','par3'),\n"
|
||||||
|
+ "('id1','Fabian',31,TIMESTAMP '1970-01-01 00:00:04','par4'),\n"
|
||||||
|
+ "('id1','Sophia',18,TIMESTAMP '1970-01-01 00:00:05','par5')";
|
||||||
|
|
||||||
|
execInsertSql(tableEnv, insertInto2);
|
||||||
|
|
||||||
|
List<Row> result = CollectionUtil.iterableToList(
|
||||||
|
() -> tableEnv.sqlQuery("select * from t1").execute().collect());
|
||||||
|
assertRowsEquals(result, "[id1,Sophia,18,1970-01-01T00:00:05,par5]");
|
||||||
|
}
|
||||||
|
|
||||||
// -------------------------------------------------------------------------
|
// -------------------------------------------------------------------------
|
||||||
// Utilities
|
// Utilities
|
||||||
// -------------------------------------------------------------------------
|
// -------------------------------------------------------------------------
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ package org.apache.hudi.table;
|
|||||||
|
|
||||||
import org.apache.hudi.configuration.FlinkOptions;
|
import org.apache.hudi.configuration.FlinkOptions;
|
||||||
import org.apache.hudi.keygen.ComplexAvroKeyGenerator;
|
import org.apache.hudi.keygen.ComplexAvroKeyGenerator;
|
||||||
|
import org.apache.hudi.keygen.NonpartitionedAvroKeyGenerator;
|
||||||
import org.apache.hudi.util.StreamerUtil;
|
import org.apache.hudi.util.StreamerUtil;
|
||||||
import org.apache.hudi.utils.TestConfigurations;
|
import org.apache.hudi.utils.TestConfigurations;
|
||||||
|
|
||||||
@@ -119,6 +120,14 @@ public class TestHoodieTableFactory {
|
|||||||
final Configuration conf2 = tableSource2.getConf();
|
final Configuration conf2 = tableSource2.getConf();
|
||||||
assertThat(conf2.get(FlinkOptions.RECORD_KEY_FIELD), is("f0,f1"));
|
assertThat(conf2.get(FlinkOptions.RECORD_KEY_FIELD), is("f0,f1"));
|
||||||
assertThat(conf2.get(FlinkOptions.KEYGEN_CLASS), is(ComplexAvroKeyGenerator.class.getName()));
|
assertThat(conf2.get(FlinkOptions.KEYGEN_CLASS), is(ComplexAvroKeyGenerator.class.getName()));
|
||||||
|
|
||||||
|
// definition with complex primary keys and empty partition paths
|
||||||
|
this.conf.setString(FlinkOptions.KEYGEN_CLASS, FlinkOptions.KEYGEN_CLASS.defaultValue());
|
||||||
|
final MockContext sourceContext3 = MockContext.getInstance(this.conf, schema2, "");
|
||||||
|
final HoodieTableSource tableSource3 = (HoodieTableSource) new HoodieTableFactory().createDynamicTableSource(sourceContext3);
|
||||||
|
final Configuration conf3 = tableSource3.getConf();
|
||||||
|
assertThat(conf3.get(FlinkOptions.RECORD_KEY_FIELD), is("f0,f1"));
|
||||||
|
assertThat(conf3.get(FlinkOptions.KEYGEN_CLASS), is(NonpartitionedAvroKeyGenerator.class.getName()));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@@ -167,6 +176,14 @@ public class TestHoodieTableFactory {
|
|||||||
final Configuration conf2 = tableSink2.getConf();
|
final Configuration conf2 = tableSink2.getConf();
|
||||||
assertThat(conf2.get(FlinkOptions.RECORD_KEY_FIELD), is("f0,f1"));
|
assertThat(conf2.get(FlinkOptions.RECORD_KEY_FIELD), is("f0,f1"));
|
||||||
assertThat(conf2.get(FlinkOptions.KEYGEN_CLASS), is(ComplexAvroKeyGenerator.class.getName()));
|
assertThat(conf2.get(FlinkOptions.KEYGEN_CLASS), is(ComplexAvroKeyGenerator.class.getName()));
|
||||||
|
|
||||||
|
// definition with complex primary keys and empty partition paths
|
||||||
|
this.conf.setString(FlinkOptions.KEYGEN_CLASS, FlinkOptions.KEYGEN_CLASS.defaultValue());
|
||||||
|
final MockContext sinkContext3 = MockContext.getInstance(this.conf, schema2, "");
|
||||||
|
final HoodieTableSink tableSink3 = (HoodieTableSink) new HoodieTableFactory().createDynamicTableSink(sinkContext3);
|
||||||
|
final Configuration conf3 = tableSink3.getConf();
|
||||||
|
assertThat(conf3.get(FlinkOptions.RECORD_KEY_FIELD), is("f0,f1"));
|
||||||
|
assertThat(conf3.get(FlinkOptions.KEYGEN_CLASS), is(NonpartitionedAvroKeyGenerator.class.getName()));
|
||||||
}
|
}
|
||||||
|
|
||||||
// -------------------------------------------------------------------------
|
// -------------------------------------------------------------------------
|
||||||
|
|||||||
Reference in New Issue
Block a user