1
0

[HUDI-2582] Support concurrent key gen for different tables with row writer path (#3817)

Co-authored-by: yao.zhou <yao.zhou@linkflowtech.com>
This commit is contained in:
Carl-Zhou-CN
2021-11-03 06:05:09 +08:00
committed by GitHub
parent d194643b49
commit dee3a14aae
2 changed files with 38 additions and 16 deletions

View File

@@ -50,6 +50,7 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import scala.Tuple2;
@@ -89,9 +90,24 @@ public class TestHoodieDatasetBulkInsertHelper extends HoodieClientTestBase {
structType = AvroConversionUtils.convertAvroSchemaToStructType(schema);
}
@Test
public void testBulkInsertHelperConcurrently() {
IntStream.range(0, 2).parallel().forEach(i -> {
if (i % 2 == 0) {
testBulkInsertHelperFor("_row_key");
} else {
testBulkInsertHelperFor("ts");
}
});
}
@Test
public void testBulkInsertHelper() {
HoodieWriteConfig config = getConfigBuilder(schemaStr).withProps(getPropsAllSet()).combineInput(false, false).build();
testBulkInsertHelperFor("_row_key");
}
private void testBulkInsertHelperFor(String recordKey) {
HoodieWriteConfig config = getConfigBuilder(schemaStr).withProps(getPropsAllSet(recordKey)).combineInput(false, false).build();
List<Row> rows = DataSourceTestUtils.generateRandomRows(10);
Dataset<Row> dataset = sqlContext.createDataFrame(rows, structType);
Dataset<Row> result = HoodieDatasetBulkInsertHelper.prepareHoodieDatasetForBulkInsert(sqlContext, config, dataset, "testStructName",
@@ -106,7 +122,7 @@ public class TestHoodieDatasetBulkInsertHelper extends HoodieClientTestBase {
}
result.toJavaRDD().foreach(entry -> {
assertTrue(entry.get(resultSchema.fieldIndex(HoodieRecord.RECORD_KEY_METADATA_FIELD)).equals(entry.getAs("_row_key")));
assertTrue(entry.get(resultSchema.fieldIndex(HoodieRecord.RECORD_KEY_METADATA_FIELD)).equals(entry.getAs(recordKey).toString()));
assertTrue(entry.get(resultSchema.fieldIndex(HoodieRecord.PARTITION_PATH_METADATA_FIELD)).equals(entry.getAs("partition")));
assertTrue(entry.get(resultSchema.fieldIndex(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD)).equals(""));
assertTrue(entry.get(resultSchema.fieldIndex(HoodieRecord.COMMIT_TIME_METADATA_FIELD)).equals(""));
@@ -148,7 +164,8 @@ public class TestHoodieDatasetBulkInsertHelper extends HoodieClientTestBase {
@ParameterizedTest
@MethodSource("providePreCombineArgs")
public void testBulkInsertPreCombine(boolean enablePreCombine) {
HoodieWriteConfig config = getConfigBuilder(schemaStr).withProps(getPropsAllSet()).combineInput(enablePreCombine, enablePreCombine)
HoodieWriteConfig config = getConfigBuilder(schemaStr).withProps(getPropsAllSet("_row_key"))
.combineInput(enablePreCombine, enablePreCombine)
.withPreCombineField("ts").build();
List<Row> inserts = DataSourceTestUtils.generateRandomRows(10);
Dataset<Row> toUpdateDataset = sqlContext.createDataFrame(inserts.subList(0, 5), structType);
@@ -207,22 +224,27 @@ public class TestHoodieDatasetBulkInsertHelper extends HoodieClientTestBase {
}
}
private Map<String, String> getPropsAllSet() {
return getProps(true, true, true, true);
private Map<String, String> getPropsAllSet(String recordKey) {
return getProps(recordKey, true, true, true, true);
}
private Map<String, String> getProps(boolean setAll, boolean setKeyGen, boolean setRecordKey, boolean setPartitionPath) {
return getProps("_row_key", setAll, setKeyGen, setRecordKey, setPartitionPath);
}
private Map<String, String> getProps(String recordKey, boolean setAll, boolean setKeyGen, boolean setRecordKey, boolean setPartitionPath) {
Map<String, String> props = new HashMap<>();
if (setAll) {
props.put(DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME().key(), "org.apache.hudi.keygen.SimpleKeyGenerator");
props.put(DataSourceWriteOptions.RECORDKEY_FIELD().key(), "_row_key");
props.put(DataSourceWriteOptions.RECORDKEY_FIELD().key(), recordKey);
props.put(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), "partition");
props.put(HoodieWriteConfig.TBL_NAME.key(), recordKey + "_table");
} else {
if (setKeyGen) {
props.put(DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME().key(), "org.apache.hudi.keygen.SimpleKeyGenerator");
}
if (setRecordKey) {
props.put(DataSourceWriteOptions.RECORDKEY_FIELD().key(), "_row_key");
props.put(DataSourceWriteOptions.RECORDKEY_FIELD().key(), recordKey);
}
if (setPartitionPath) {
props.put(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), "partition");