[HUDI-912] Refactor and relocate KeyGenerator to support more engines (#2200)
* [HUDI-912] Refactor and relocate KeyGenerator to support more engines * Rename KeyGenerators
This commit is contained in:
@@ -0,0 +1,120 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.keygen;
|
||||
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.hudi.ApiMaturityLevel;
|
||||
import org.apache.hudi.AvroConversionHelper;
|
||||
import org.apache.hudi.PublicAPIMethod;
|
||||
import org.apache.hudi.common.config.TypedProperties;
|
||||
import org.apache.hudi.exception.HoodieKeyException;
|
||||
import org.apache.spark.sql.Row;
|
||||
import org.apache.spark.sql.types.StructType;
|
||||
import scala.Function1;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Base class for the built-in key generators. Contains methods structured for
|
||||
* code reuse amongst them.
|
||||
*/
|
||||
public abstract class BuiltinKeyGenerator extends BaseKeyGenerator implements SparkKeyGeneratorInterface {
|
||||
|
||||
private static final String STRUCT_NAME = "hoodieRowTopLevelField";
|
||||
private static final String NAMESPACE = "hoodieRow";
|
||||
private transient Function1<Object, Object> converterFn = null;
|
||||
protected StructType structType;
|
||||
|
||||
protected Map<String, List<Integer>> recordKeyPositions = new HashMap<>();
|
||||
protected Map<String, List<Integer>> partitionPathPositions = new HashMap<>();
|
||||
|
||||
protected BuiltinKeyGenerator(TypedProperties config) {
|
||||
super(config);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch record key from {@link Row}.
|
||||
* @param row instance of {@link Row} from which record key is requested.
|
||||
* @return the record key of interest from {@link Row}.
|
||||
*/
|
||||
@Override
|
||||
@PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
|
||||
public String getRecordKey(Row row) {
|
||||
if (null == converterFn) {
|
||||
converterFn = AvroConversionHelper.createConverterToAvro(row.schema(), STRUCT_NAME, NAMESPACE);
|
||||
}
|
||||
GenericRecord genericRecord = (GenericRecord) converterFn.apply(row);
|
||||
return getKey(genericRecord).getRecordKey();
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch partition path from {@link Row}.
|
||||
* @param row instance of {@link Row} from which partition path is requested
|
||||
* @return the partition path of interest from {@link Row}.
|
||||
*/
|
||||
@Override
|
||||
@PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
|
||||
public String getPartitionPath(Row row) {
|
||||
if (null == converterFn) {
|
||||
converterFn = AvroConversionHelper.createConverterToAvro(row.schema(), STRUCT_NAME, NAMESPACE);
|
||||
}
|
||||
GenericRecord genericRecord = (GenericRecord) converterFn.apply(row);
|
||||
return getKey(genericRecord).getPartitionPath();
|
||||
}
|
||||
|
||||
void buildFieldPositionMapIfNeeded(StructType structType) {
|
||||
if (this.structType == null) {
|
||||
// parse simple fields
|
||||
getRecordKeyFields().stream()
|
||||
.filter(f -> !(f.contains(".")))
|
||||
.forEach(f -> {
|
||||
if (structType.getFieldIndex(f).isDefined()) {
|
||||
recordKeyPositions.put(f, Collections.singletonList((Integer) (structType.getFieldIndex(f).get())));
|
||||
} else {
|
||||
throw new HoodieKeyException("recordKey value not found for field: \"" + f + "\"");
|
||||
}
|
||||
});
|
||||
// parse nested fields
|
||||
getRecordKeyFields().stream()
|
||||
.filter(f -> f.contains("."))
|
||||
.forEach(f -> recordKeyPositions.put(f, RowKeyGeneratorHelper.getNestedFieldIndices(structType, f, true)));
|
||||
// parse simple fields
|
||||
if (getPartitionPathFields() != null) {
|
||||
getPartitionPathFields().stream().filter(f -> !f.isEmpty()).filter(f -> !(f.contains(".")))
|
||||
.forEach(f -> {
|
||||
if (structType.getFieldIndex(f).isDefined()) {
|
||||
partitionPathPositions.put(f,
|
||||
Collections.singletonList((Integer) (structType.getFieldIndex(f).get())));
|
||||
} else {
|
||||
partitionPathPositions.put(f, Collections.singletonList(-1));
|
||||
}
|
||||
});
|
||||
// parse nested fields
|
||||
getPartitionPathFields().stream().filter(f -> !f.isEmpty()).filter(f -> f.contains("."))
|
||||
.forEach(f -> partitionPathPositions.put(f,
|
||||
RowKeyGeneratorHelper.getNestedFieldIndices(structType, f, false)));
|
||||
}
|
||||
this.structType = structType;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,67 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.keygen;
|
||||
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.hudi.common.config.TypedProperties;
|
||||
import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
|
||||
import org.apache.spark.sql.Row;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Complex key generator, which takes names of fields to be used for recordKey and partitionPath as configs.
|
||||
*/
|
||||
public class ComplexKeyGenerator extends BuiltinKeyGenerator {
|
||||
|
||||
private final ComplexAvroKeyGenerator complexAvroKeyGenerator;
|
||||
|
||||
public ComplexKeyGenerator(TypedProperties props) {
|
||||
super(props);
|
||||
this.recordKeyFields = Arrays.stream(props.getString(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY)
|
||||
.split(",")).map(String::trim).filter(s -> !s.isEmpty()).collect(Collectors.toList());
|
||||
this.partitionPathFields = Arrays.stream(props.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY)
|
||||
.split(",")).map(String::trim).filter(s -> !s.isEmpty()).collect(Collectors.toList());
|
||||
complexAvroKeyGenerator = new ComplexAvroKeyGenerator(props);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getRecordKey(GenericRecord record) {
|
||||
return complexAvroKeyGenerator.getRecordKey(record);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getPartitionPath(GenericRecord record) {
|
||||
return complexAvroKeyGenerator.getPartitionPath(record);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getRecordKey(Row row) {
|
||||
buildFieldPositionMapIfNeeded(row.schema());
|
||||
return RowKeyGeneratorHelper.getRecordKeyFromRow(row, getRecordKeyFields(), recordKeyPositions, true);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getPartitionPath(Row row) {
|
||||
buildFieldPositionMapIfNeeded(row.schema());
|
||||
return RowKeyGeneratorHelper.getPartitionPathFromRow(row, getPartitionPathFields(),
|
||||
hiveStylePartitioning, partitionPathPositions);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,135 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.keygen;
|
||||
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.hudi.common.config.TypedProperties;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.exception.HoodieKeyException;
|
||||
import org.apache.hudi.exception.HoodieKeyGeneratorException;
|
||||
import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
|
||||
import org.apache.spark.sql.Row;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* This is a generic implementation of KeyGenerator where users can configure record key as a single field or a combination of fields. Similarly partition path can be configured to have multiple
|
||||
* fields or only one field. This class expects value for prop "hoodie.datasource.write.partitionpath.field" in a specific format. For example:
|
||||
*
|
||||
* properties.put("hoodie.datasource.write.partitionpath.field", "field1:PartitionKeyType1,field2:PartitionKeyType2").
|
||||
*
|
||||
* The complete partition path is created as <value for field1 basis PartitionKeyType1>/<value for field2 basis PartitionKeyType2> and so on.
|
||||
*
|
||||
* Few points to consider: 1. If you want to customize some partition path field on a timestamp basis, you can use field1:timestampBased 2. If you simply want to have the value of your configured
|
||||
* field in the partition path, use field1:simple 3. If you want your table to be non partitioned, simply leave it as blank.
|
||||
*
|
||||
* RecordKey is internally generated using either SimpleKeyGenerator or ComplexKeyGenerator.
|
||||
*/
|
||||
public class CustomKeyGenerator extends BuiltinKeyGenerator {
|
||||
|
||||
private final CustomAvroKeyGenerator customAvroKeyGenerator;
|
||||
|
||||
public CustomKeyGenerator(TypedProperties props) {
|
||||
super(props);
|
||||
this.recordKeyFields = Arrays.stream(props.getString(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY).split(",")).map(String::trim).collect(Collectors.toList());
|
||||
this.partitionPathFields = Arrays.stream(props.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY).split(",")).map(String::trim).collect(Collectors.toList());
|
||||
customAvroKeyGenerator = new CustomAvroKeyGenerator(props);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getRecordKey(GenericRecord record) {
|
||||
return customAvroKeyGenerator.getRecordKey(record);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getPartitionPath(GenericRecord record) {
|
||||
return customAvroKeyGenerator.getPartitionPath(record);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getRecordKey(Row row) {
|
||||
validateRecordKeyFields();
|
||||
return getRecordKeyFields().size() == 1
|
||||
? new SimpleKeyGenerator(config).getRecordKey(row)
|
||||
: new ComplexKeyGenerator(config).getRecordKey(row);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getPartitionPath(Row row) {
|
||||
return getPartitionPath(Option.empty(), Option.of(row));
|
||||
}
|
||||
|
||||
private String getPartitionPath(Option<GenericRecord> record, Option<Row> row) {
|
||||
if (getPartitionPathFields() == null) {
|
||||
throw new HoodieKeyException("Unable to find field names for partition path in cfg");
|
||||
}
|
||||
|
||||
String partitionPathField;
|
||||
StringBuilder partitionPath = new StringBuilder();
|
||||
|
||||
//Corresponds to no partition case
|
||||
if (getPartitionPathFields().size() == 1 && getPartitionPathFields().get(0).isEmpty()) {
|
||||
return "";
|
||||
}
|
||||
for (String field : getPartitionPathFields()) {
|
||||
String[] fieldWithType = field.split(customAvroKeyGenerator.getSplitRegex());
|
||||
if (fieldWithType.length != 2) {
|
||||
throw new HoodieKeyGeneratorException("Unable to find field names for partition path in proper format");
|
||||
}
|
||||
|
||||
partitionPathField = fieldWithType[0];
|
||||
CustomAvroKeyGenerator.PartitionKeyType keyType = CustomAvroKeyGenerator.PartitionKeyType.valueOf(fieldWithType[1].toUpperCase());
|
||||
switch (keyType) {
|
||||
case SIMPLE:
|
||||
if (record.isPresent()) {
|
||||
partitionPath.append(new SimpleKeyGenerator(config, partitionPathField).getPartitionPath(record.get()));
|
||||
} else {
|
||||
partitionPath.append(new SimpleKeyGenerator(config, partitionPathField).getPartitionPath(row.get()));
|
||||
}
|
||||
break;
|
||||
case TIMESTAMP:
|
||||
try {
|
||||
if (record.isPresent()) {
|
||||
partitionPath.append(new TimestampBasedKeyGenerator(config, partitionPathField).getPartitionPath(record.get()));
|
||||
} else {
|
||||
partitionPath.append(new TimestampBasedKeyGenerator(config, partitionPathField).getPartitionPath(row.get()));
|
||||
}
|
||||
} catch (IOException ioe) {
|
||||
throw new HoodieKeyGeneratorException("Unable to initialise TimestampBasedKeyGenerator class");
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new HoodieKeyGeneratorException("Please provide valid PartitionKeyType with fields! You provided: " + keyType);
|
||||
}
|
||||
|
||||
partitionPath.append(customAvroKeyGenerator.getDefaultPartitionPathSeparator());
|
||||
}
|
||||
partitionPath.deleteCharAt(partitionPath.length() - 1);
|
||||
return partitionPath.toString();
|
||||
}
|
||||
|
||||
private void validateRecordKeyFields() {
|
||||
if (getRecordKeyFields() == null || getRecordKeyFields().isEmpty()) {
|
||||
throw new HoodieKeyException("Unable to find field names for record key in cfg");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,69 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.keygen;
|
||||
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.hudi.common.config.TypedProperties;
|
||||
import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
|
||||
import org.apache.spark.sql.Row;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Key generator for deletes using global indices. Global index deletes do not require partition value so this key generator
|
||||
* avoids using partition value for generating HoodieKey.
|
||||
*/
|
||||
public class GlobalDeleteKeyGenerator extends BuiltinKeyGenerator {
|
||||
|
||||
private final GlobalAvroDeleteKeyGenerator globalAvroDeleteKeyGenerator;
|
||||
public GlobalDeleteKeyGenerator(TypedProperties config) {
|
||||
super(config);
|
||||
this.recordKeyFields = Arrays.asList(config.getString(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY).split(","));
|
||||
globalAvroDeleteKeyGenerator = new GlobalAvroDeleteKeyGenerator(config);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getRecordKey(GenericRecord record) {
|
||||
return globalAvroDeleteKeyGenerator.getRecordKey(record);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getPartitionPath(GenericRecord record) {
|
||||
return globalAvroDeleteKeyGenerator.getPartitionPath(record);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getPartitionPathFields() {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getRecordKey(Row row) {
|
||||
buildFieldPositionMapIfNeeded(row.schema());
|
||||
return RowKeyGeneratorHelper.getRecordKeyFromRow(row, getRecordKeyFields(), recordKeyPositions, true);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getPartitionPath(Row row) {
|
||||
return globalAvroDeleteKeyGenerator.getEmptyPartition();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,55 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.keygen;
|
||||
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.hudi.common.config.TypedProperties;
|
||||
import org.apache.spark.sql.Row;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Simple Key generator for unpartitioned Hive Tables.
|
||||
*/
|
||||
public class NonpartitionedKeyGenerator extends SimpleKeyGenerator {
|
||||
|
||||
private final NonpartitionedAvroKeyGenerator nonpartitionedAvroKeyGenerator;
|
||||
|
||||
public NonpartitionedKeyGenerator(TypedProperties config) {
|
||||
super(config);
|
||||
nonpartitionedAvroKeyGenerator = new NonpartitionedAvroKeyGenerator(config);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getPartitionPath(GenericRecord record) {
|
||||
return nonpartitionedAvroKeyGenerator.getPartitionPath(record);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getPartitionPathFields() {
|
||||
return nonpartitionedAvroKeyGenerator.getPartitionPathFields();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getPartitionPath(Row row) {
|
||||
return nonpartitionedAvroKeyGenerator.getEmptyPartition();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,200 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.keygen;
|
||||
|
||||
import org.apache.hudi.exception.HoodieKeyException;
|
||||
import org.apache.spark.sql.Row;
|
||||
import org.apache.spark.sql.types.StructField;
|
||||
import org.apache.spark.sql.types.StructType;
|
||||
import scala.Option;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
import static org.apache.hudi.keygen.KeyGenUtils.DEFAULT_PARTITION_PATH;
|
||||
import static org.apache.hudi.keygen.KeyGenUtils.DEFAULT_PARTITION_PATH_SEPARATOR;
|
||||
import static org.apache.hudi.keygen.KeyGenUtils.EMPTY_RECORDKEY_PLACEHOLDER;
|
||||
import static org.apache.hudi.keygen.KeyGenUtils.NULL_RECORDKEY_PLACEHOLDER;
|
||||
|
||||
/**
|
||||
* Helper class to fetch fields from Row.
|
||||
*/
|
||||
public class RowKeyGeneratorHelper {
|
||||
|
||||
/**
|
||||
* Generates record key for the corresponding {@link Row}.
|
||||
* @param row instance of {@link Row} of interest
|
||||
* @param recordKeyFields record key fields as a list
|
||||
* @param recordKeyPositions record key positions for the corresponding record keys in {@code recordKeyFields}
|
||||
* @param prefixFieldName {@code true} if field name need to be prefixed in the returned result. {@code false} otherwise.
|
||||
* @return the record key thus generated
|
||||
*/
|
||||
public static String getRecordKeyFromRow(Row row, List<String> recordKeyFields, Map<String, List<Integer>> recordKeyPositions, boolean prefixFieldName) {
|
||||
AtomicBoolean keyIsNullOrEmpty = new AtomicBoolean(true);
|
||||
String toReturn = recordKeyFields.stream().map(field -> {
|
||||
String val = null;
|
||||
List<Integer> fieldPositions = recordKeyPositions.get(field);
|
||||
if (fieldPositions.size() == 1) { // simple field
|
||||
Integer fieldPos = fieldPositions.get(0);
|
||||
if (row.isNullAt(fieldPos)) {
|
||||
val = NULL_RECORDKEY_PLACEHOLDER;
|
||||
} else {
|
||||
val = row.getAs(field).toString();
|
||||
if (val.isEmpty()) {
|
||||
val = EMPTY_RECORDKEY_PLACEHOLDER;
|
||||
} else {
|
||||
keyIsNullOrEmpty.set(false);
|
||||
}
|
||||
}
|
||||
} else { // nested fields
|
||||
val = getNestedFieldVal(row, recordKeyPositions.get(field)).toString();
|
||||
if (!val.contains(NULL_RECORDKEY_PLACEHOLDER) && !val.contains(EMPTY_RECORDKEY_PLACEHOLDER)) {
|
||||
keyIsNullOrEmpty.set(false);
|
||||
}
|
||||
}
|
||||
return prefixFieldName ? (field + ":" + val) : val;
|
||||
}).collect(Collectors.joining(","));
|
||||
if (keyIsNullOrEmpty.get()) {
|
||||
throw new HoodieKeyException("recordKey value: \"" + toReturn + "\" for fields: \"" + Arrays.toString(recordKeyFields.toArray()) + "\" cannot be null or empty.");
|
||||
}
|
||||
return toReturn;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates partition path for the corresponding {@link Row}.
|
||||
* @param row instance of {@link Row} of interest
|
||||
* @param partitionPathFields partition path fields as a list
|
||||
* @param hiveStylePartitioning {@code true} if hive style partitioning is set. {@code false} otherwise
|
||||
* @param partitionPathPositions partition path positions for the corresponding fields in {@code partitionPathFields}
|
||||
* @return the generated partition path for the row
|
||||
*/
|
||||
public static String getPartitionPathFromRow(Row row, List<String> partitionPathFields, boolean hiveStylePartitioning, Map<String, List<Integer>> partitionPathPositions) {
|
||||
return IntStream.range(0, partitionPathFields.size()).mapToObj(idx -> {
|
||||
String field = partitionPathFields.get(idx);
|
||||
String val = null;
|
||||
List<Integer> fieldPositions = partitionPathPositions.get(field);
|
||||
if (fieldPositions.size() == 1) { // simple
|
||||
Integer fieldPos = fieldPositions.get(0);
|
||||
// for partition path, if field is not found, index will be set to -1
|
||||
if (fieldPos == -1 || row.isNullAt(fieldPos)) {
|
||||
val = DEFAULT_PARTITION_PATH;
|
||||
} else {
|
||||
val = row.getAs(field).toString();
|
||||
if (val.isEmpty()) {
|
||||
val = DEFAULT_PARTITION_PATH;
|
||||
}
|
||||
}
|
||||
if (hiveStylePartitioning) {
|
||||
val = field + "=" + val;
|
||||
}
|
||||
} else { // nested
|
||||
Object nestedVal = getNestedFieldVal(row, partitionPathPositions.get(field));
|
||||
if (nestedVal.toString().contains(NULL_RECORDKEY_PLACEHOLDER) || nestedVal.toString().contains(EMPTY_RECORDKEY_PLACEHOLDER)) {
|
||||
val = hiveStylePartitioning ? field + "=" + DEFAULT_PARTITION_PATH : DEFAULT_PARTITION_PATH;
|
||||
} else {
|
||||
val = hiveStylePartitioning ? field + "=" + nestedVal.toString() : nestedVal.toString();
|
||||
}
|
||||
}
|
||||
return val;
|
||||
}).collect(Collectors.joining(DEFAULT_PARTITION_PATH_SEPARATOR));
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch the field value located at the positions requested for.
|
||||
* @param row instance of {@link Row} of interest
|
||||
* @param positions tree style positions where the leaf node need to be fetched and returned
|
||||
* @return the field value as per the positions requested for.
|
||||
*/
|
||||
public static Object getNestedFieldVal(Row row, List<Integer> positions) {
|
||||
if (positions.size() == 1 && positions.get(0) == -1) {
|
||||
return DEFAULT_PARTITION_PATH;
|
||||
}
|
||||
int index = 0;
|
||||
int totalCount = positions.size();
|
||||
Row valueToProcess = row;
|
||||
Object toReturn = null;
|
||||
|
||||
while (index < totalCount) {
|
||||
if (index < totalCount - 1) {
|
||||
if (valueToProcess.isNullAt(positions.get(index))) {
|
||||
toReturn = NULL_RECORDKEY_PLACEHOLDER;
|
||||
break;
|
||||
}
|
||||
valueToProcess = (Row) valueToProcess.get(positions.get(index));
|
||||
} else { // last index
|
||||
if (null != valueToProcess.getAs(positions.get(index)) && valueToProcess.getAs(positions.get(index)).toString().isEmpty()) {
|
||||
toReturn = EMPTY_RECORDKEY_PLACEHOLDER;
|
||||
break;
|
||||
}
|
||||
toReturn = valueToProcess.getAs(positions.get(index));
|
||||
}
|
||||
index++;
|
||||
}
|
||||
return toReturn;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate the tree style positions for the field requested for as per the defined struct type.
|
||||
* @param structType schema of interest
|
||||
* @param field field of interest for which the positions are requested for
|
||||
* @param isRecordKey {@code true} if the field requested for is a record key. {@code false} incase of a partition path.
|
||||
* @return the positions of the field as per the struct type.
|
||||
*/
|
||||
public static List<Integer> getNestedFieldIndices(StructType structType, String field, boolean isRecordKey) {
|
||||
String[] slices = field.split("\\.");
|
||||
List<Integer> positions = new ArrayList<>();
|
||||
int index = 0;
|
||||
int totalCount = slices.length;
|
||||
while (index < totalCount) {
|
||||
String slice = slices[index];
|
||||
Option<Object> curIndexOpt = structType.getFieldIndex(slice);
|
||||
if (curIndexOpt.isDefined()) {
|
||||
int curIndex = (int) curIndexOpt.get();
|
||||
positions.add(curIndex);
|
||||
final StructField nestedField = structType.fields()[curIndex];
|
||||
if (index < totalCount - 1) {
|
||||
if (!(nestedField.dataType() instanceof StructType)) {
|
||||
if (isRecordKey) {
|
||||
throw new HoodieKeyException("Nested field should be of type StructType " + nestedField);
|
||||
} else {
|
||||
positions = Collections.singletonList(-1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
structType = (StructType) nestedField.dataType();
|
||||
}
|
||||
} else {
|
||||
if (isRecordKey) {
|
||||
throw new HoodieKeyException("Can't find " + slice + " in StructType for the field " + field);
|
||||
} else {
|
||||
positions = Collections.singletonList(-1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
index++;
|
||||
}
|
||||
return positions;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.keygen;
|
||||
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.hudi.common.config.TypedProperties;
|
||||
import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
|
||||
import org.apache.spark.sql.Row;
|
||||
|
||||
import java.util.Collections;
|
||||
|
||||
/**
|
||||
* Simple key generator, which takes names of fields to be used for recordKey and partitionPath as configs.
|
||||
*/
|
||||
public class SimpleKeyGenerator extends BuiltinKeyGenerator {
|
||||
|
||||
private final SimpleAvroKeyGenerator simpleAvroKeyGenerator;
|
||||
|
||||
public SimpleKeyGenerator(TypedProperties props) {
|
||||
this(props, props.getString(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY),
|
||||
props.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY));
|
||||
}
|
||||
|
||||
SimpleKeyGenerator(TypedProperties props, String partitionPathField) {
|
||||
this(props, null, partitionPathField);
|
||||
}
|
||||
|
||||
SimpleKeyGenerator(TypedProperties props, String recordKeyField, String partitionPathField) {
|
||||
super(props);
|
||||
this.recordKeyFields = recordKeyField == null
|
||||
? Collections.emptyList()
|
||||
: Collections.singletonList(recordKeyField);
|
||||
this.partitionPathFields = Collections.singletonList(partitionPathField);
|
||||
simpleAvroKeyGenerator = new SimpleAvroKeyGenerator(props, recordKeyField, partitionPathField);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getRecordKey(GenericRecord record) {
|
||||
return simpleAvroKeyGenerator.getRecordKey(record);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getPartitionPath(GenericRecord record) {
|
||||
return simpleAvroKeyGenerator.getPartitionPath(record);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getRecordKey(Row row) {
|
||||
buildFieldPositionMapIfNeeded(row.schema());
|
||||
return RowKeyGeneratorHelper.getRecordKeyFromRow(row, getRecordKeyFields(), recordKeyPositions, false);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getPartitionPath(Row row) {
|
||||
buildFieldPositionMapIfNeeded(row.schema());
|
||||
return RowKeyGeneratorHelper.getPartitionPathFromRow(row, getPartitionPathFields(),
|
||||
hiveStylePartitioning, partitionPathPositions);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,82 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.keygen;
|
||||
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.hudi.common.config.TypedProperties;
|
||||
import org.apache.hudi.exception.HoodieKeyGeneratorException;
|
||||
import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
|
||||
import org.apache.spark.sql.Row;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import static org.apache.hudi.keygen.KeyGenUtils.DEFAULT_PARTITION_PATH;
|
||||
import static org.apache.hudi.keygen.KeyGenUtils.EMPTY_RECORDKEY_PLACEHOLDER;
|
||||
import static org.apache.hudi.keygen.KeyGenUtils.NULL_RECORDKEY_PLACEHOLDER;
|
||||
|
||||
/**
|
||||
* Key generator, that relies on timestamps for partitioning field. Still picks record key by name.
|
||||
*/
|
||||
public class TimestampBasedKeyGenerator extends SimpleKeyGenerator {
|
||||
|
||||
private final TimestampBasedAvroKeyGenerator timestampBasedAvroKeyGenerator;
|
||||
|
||||
public TimestampBasedKeyGenerator(TypedProperties config) throws IOException {
|
||||
this(config, config.getString(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY),
|
||||
config.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY));
|
||||
}
|
||||
|
||||
TimestampBasedKeyGenerator(TypedProperties config, String partitionPathField) throws IOException {
|
||||
this(config, null, partitionPathField);
|
||||
}
|
||||
|
||||
TimestampBasedKeyGenerator(TypedProperties config, String recordKeyField, String partitionPathField) throws IOException {
|
||||
super(config, recordKeyField, partitionPathField);
|
||||
timestampBasedAvroKeyGenerator = new TimestampBasedAvroKeyGenerator(config, recordKeyField, partitionPathField);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getPartitionPath(GenericRecord record) {
|
||||
return timestampBasedAvroKeyGenerator.getPartitionPath(record);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getRecordKey(Row row) {
|
||||
buildFieldPositionMapIfNeeded(row.schema());
|
||||
return RowKeyGeneratorHelper.getRecordKeyFromRow(row, getRecordKeyFields(), recordKeyPositions, false);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getPartitionPath(Row row) {
|
||||
Object fieldVal = null;
|
||||
buildFieldPositionMapIfNeeded(row.schema());
|
||||
Object partitionPathFieldVal = RowKeyGeneratorHelper.getNestedFieldVal(row, partitionPathPositions.get(getPartitionPathFields().get(0)));
|
||||
try {
|
||||
if (partitionPathFieldVal == null || partitionPathFieldVal.toString().contains(DEFAULT_PARTITION_PATH) || partitionPathFieldVal.toString().contains(NULL_RECORDKEY_PLACEHOLDER)
|
||||
|| partitionPathFieldVal.toString().contains(EMPTY_RECORDKEY_PLACEHOLDER)) {
|
||||
fieldVal = timestampBasedAvroKeyGenerator.getDefaultPartitionVal();
|
||||
} else {
|
||||
fieldVal = partitionPathFieldVal;
|
||||
}
|
||||
return timestampBasedAvroKeyGenerator.getPartitionPath(fieldVal);
|
||||
} catch (Exception e) {
|
||||
throw new HoodieKeyGeneratorException("Unable to parse input partition field :" + fieldVal, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,368 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi
|
||||
|
||||
import java.nio.ByteBuffer
|
||||
import java.sql.{Date, Timestamp}
|
||||
import java.util
|
||||
|
||||
import org.apache.avro.Conversions.DecimalConversion
|
||||
import org.apache.avro.LogicalTypes.{TimestampMicros, TimestampMillis}
|
||||
import org.apache.avro.Schema.Type._
|
||||
import org.apache.avro.generic.GenericData.{Fixed, Record}
|
||||
import org.apache.avro.generic.{GenericData, GenericFixed, GenericRecord}
|
||||
import org.apache.avro.{LogicalTypes, Schema}
|
||||
import org.apache.spark.sql.Row
|
||||
import org.apache.spark.sql.avro.{IncompatibleSchemaException, SchemaConverters}
|
||||
import org.apache.spark.sql.catalyst.expressions.GenericRow
|
||||
import org.apache.spark.sql.catalyst.util.DateTimeUtils
|
||||
import org.apache.spark.sql.types._
|
||||
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
object AvroConversionHelper {
|
||||
|
||||
private def createDecimal(decimal: java.math.BigDecimal, precision: Int, scale: Int): Decimal = {
|
||||
if (precision <= Decimal.MAX_LONG_DIGITS) {
|
||||
// Constructs a `Decimal` with an unscaled `Long` value if possible.
|
||||
Decimal(decimal.unscaledValue().longValue(), precision, scale)
|
||||
} else {
|
||||
// Otherwise, resorts to an unscaled `BigInteger` instead.
|
||||
Decimal(decimal, precision, scale)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* Returns a converter function to convert row in avro format to GenericRow of catalyst.
|
||||
*
|
||||
* @param sourceAvroSchema Source schema before conversion inferred from avro file by passed in
|
||||
* by user.
|
||||
* @param targetSqlType Target catalyst sql type after the conversion.
|
||||
* @return returns a converter function to convert row in avro format to GenericRow of catalyst.
|
||||
*/
|
||||
def createConverterToRow(sourceAvroSchema: Schema,
|
||||
targetSqlType: DataType): AnyRef => AnyRef = {
|
||||
|
||||
def createConverter(avroSchema: Schema, sqlType: DataType, path: List[String]): AnyRef => AnyRef = {
|
||||
val avroType = avroSchema.getType
|
||||
(sqlType, avroType) match {
|
||||
// Avro strings are in Utf8, so we have to call toString on them
|
||||
case (StringType, STRING) | (StringType, ENUM) =>
|
||||
(item: AnyRef) => if (item == null) null else item.toString
|
||||
// Byte arrays are reused by avro, so we have to make a copy of them.
|
||||
case (IntegerType, INT) | (BooleanType, BOOLEAN) | (DoubleType, DOUBLE) |
|
||||
(FloatType, FLOAT) | (LongType, LONG) =>
|
||||
identity
|
||||
case (BinaryType, FIXED) =>
|
||||
(item: AnyRef) =>
|
||||
if (item == null) {
|
||||
null
|
||||
} else {
|
||||
item.asInstanceOf[Fixed].bytes().clone()
|
||||
}
|
||||
case (BinaryType, BYTES) =>
|
||||
(item: AnyRef) =>
|
||||
if (item == null) {
|
||||
null
|
||||
} else {
|
||||
val byteBuffer = item.asInstanceOf[ByteBuffer]
|
||||
val bytes = new Array[Byte](byteBuffer.remaining)
|
||||
byteBuffer.get(bytes)
|
||||
bytes
|
||||
}
|
||||
case (d: DecimalType, FIXED) =>
|
||||
(item: AnyRef) =>
|
||||
if (item == null) {
|
||||
null
|
||||
} else {
|
||||
val decimalConversion = new DecimalConversion
|
||||
val bigDecimal = decimalConversion.fromFixed(item.asInstanceOf[GenericFixed], avroSchema,
|
||||
LogicalTypes.decimal(d.precision, d.scale))
|
||||
createDecimal(bigDecimal, d.precision, d.scale)
|
||||
}
|
||||
case (d: DecimalType, BYTES) =>
|
||||
(item: AnyRef) =>
|
||||
if (item == null) {
|
||||
null
|
||||
} else {
|
||||
val decimalConversion = new DecimalConversion
|
||||
val bigDecimal = decimalConversion.fromBytes(item.asInstanceOf[ByteBuffer], avroSchema,
|
||||
LogicalTypes.decimal(d.precision, d.scale))
|
||||
createDecimal(bigDecimal, d.precision, d.scale)
|
||||
}
|
||||
case (DateType, INT) =>
|
||||
(item: AnyRef) =>
|
||||
if (item == null) {
|
||||
null
|
||||
} else {
|
||||
item match {
|
||||
case integer: Integer => DateTimeUtils.toJavaDate(integer)
|
||||
case _ => new Date(item.asInstanceOf[Long])
|
||||
}
|
||||
}
|
||||
case (TimestampType, LONG) =>
|
||||
(item: AnyRef) =>
|
||||
if (item == null) {
|
||||
null
|
||||
} else {
|
||||
avroSchema.getLogicalType match {
|
||||
case _: TimestampMillis =>
|
||||
new Timestamp(item.asInstanceOf[Long])
|
||||
case _: TimestampMicros =>
|
||||
new Timestamp(item.asInstanceOf[Long] / 1000)
|
||||
case null =>
|
||||
new Timestamp(item.asInstanceOf[Long])
|
||||
case other =>
|
||||
throw new IncompatibleSchemaException(
|
||||
s"Cannot convert Avro logical type $other to Catalyst Timestamp type.")
|
||||
}
|
||||
}
|
||||
case (struct: StructType, RECORD) =>
|
||||
val length = struct.fields.length
|
||||
val converters = new Array[AnyRef => AnyRef](length)
|
||||
val avroFieldIndexes = new Array[Int](length)
|
||||
var i = 0
|
||||
while (i < length) {
|
||||
val sqlField = struct.fields(i)
|
||||
val avroField = avroSchema.getField(sqlField.name)
|
||||
if (avroField != null) {
|
||||
val converter = createConverter(avroField.schema(), sqlField.dataType,
|
||||
path :+ sqlField.name)
|
||||
converters(i) = converter
|
||||
avroFieldIndexes(i) = avroField.pos()
|
||||
} else if (!sqlField.nullable) {
|
||||
throw new IncompatibleSchemaException(
|
||||
s"Cannot find non-nullable field ${sqlField.name} at path ${path.mkString(".")} " +
|
||||
"in Avro schema\n" +
|
||||
s"Source Avro schema: $sourceAvroSchema.\n" +
|
||||
s"Target Catalyst type: $targetSqlType")
|
||||
}
|
||||
i += 1
|
||||
}
|
||||
|
||||
(item: AnyRef) => {
|
||||
if (item == null) {
|
||||
null
|
||||
} else {
|
||||
val record = item.asInstanceOf[GenericRecord]
|
||||
|
||||
val result = new Array[Any](length)
|
||||
var i = 0
|
||||
while (i < converters.length) {
|
||||
if (converters(i) != null) {
|
||||
val converter = converters(i)
|
||||
result(i) = converter(record.get(avroFieldIndexes(i)))
|
||||
}
|
||||
i += 1
|
||||
}
|
||||
new GenericRow(result)
|
||||
}
|
||||
}
|
||||
case (arrayType: ArrayType, ARRAY) =>
|
||||
val elementConverter = createConverter(avroSchema.getElementType, arrayType.elementType,
|
||||
path)
|
||||
val allowsNull = arrayType.containsNull
|
||||
(item: AnyRef) => {
|
||||
if (item == null) {
|
||||
null
|
||||
} else {
|
||||
item.asInstanceOf[java.lang.Iterable[AnyRef]].asScala.map { element =>
|
||||
if (element == null && !allowsNull) {
|
||||
throw new RuntimeException(s"Array value at path ${path.mkString(".")} is not " +
|
||||
"allowed to be null")
|
||||
} else {
|
||||
elementConverter(element)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
case (mapType: MapType, MAP) if mapType.keyType == StringType =>
|
||||
val valueConverter = createConverter(avroSchema.getValueType, mapType.valueType, path)
|
||||
val allowsNull = mapType.valueContainsNull
|
||||
(item: AnyRef) => {
|
||||
if (item == null) {
|
||||
null
|
||||
} else {
|
||||
item.asInstanceOf[java.util.Map[AnyRef, AnyRef]].asScala.map { x =>
|
||||
if (x._2 == null && !allowsNull) {
|
||||
throw new RuntimeException(s"Map value at path ${path.mkString(".")} is not " +
|
||||
"allowed to be null")
|
||||
} else {
|
||||
(x._1.toString, valueConverter(x._2))
|
||||
}
|
||||
}.toMap
|
||||
}
|
||||
}
|
||||
case (sqlType, UNION) =>
|
||||
if (avroSchema.getTypes.asScala.exists(_.getType == NULL)) {
|
||||
val remainingUnionTypes = avroSchema.getTypes.asScala.filterNot(_.getType == NULL)
|
||||
if (remainingUnionTypes.size == 1) {
|
||||
createConverter(remainingUnionTypes.head, sqlType, path)
|
||||
} else {
|
||||
createConverter(Schema.createUnion(remainingUnionTypes.asJava), sqlType, path)
|
||||
}
|
||||
} else avroSchema.getTypes.asScala.map(_.getType) match {
|
||||
case Seq(_) => createConverter(avroSchema.getTypes.get(0), sqlType, path)
|
||||
case Seq(a, b) if Set(a, b) == Set(INT, LONG) && sqlType == LongType =>
|
||||
(item: AnyRef) => {
|
||||
item match {
|
||||
case null => null
|
||||
case l: java.lang.Long => l
|
||||
case i: java.lang.Integer => new java.lang.Long(i.longValue())
|
||||
}
|
||||
}
|
||||
case Seq(a, b) if Set(a, b) == Set(FLOAT, DOUBLE) && sqlType == DoubleType =>
|
||||
(item: AnyRef) => {
|
||||
item match {
|
||||
case null => null
|
||||
case d: java.lang.Double => d
|
||||
case f: java.lang.Float => new java.lang.Double(f.doubleValue())
|
||||
}
|
||||
}
|
||||
case other =>
|
||||
sqlType match {
|
||||
case t: StructType if t.fields.length == avroSchema.getTypes.size =>
|
||||
val fieldConverters = t.fields.zip(avroSchema.getTypes.asScala).map {
|
||||
case (field, schema) =>
|
||||
createConverter(schema, field.dataType, path :+ field.name)
|
||||
}
|
||||
|
||||
(item: AnyRef) =>
|
||||
if (item == null) {
|
||||
null
|
||||
} else {
|
||||
val i = GenericData.get().resolveUnion(avroSchema, item)
|
||||
val converted = new Array[Any](fieldConverters.length)
|
||||
converted(i) = fieldConverters(i)(item)
|
||||
new GenericRow(converted)
|
||||
}
|
||||
case _ => throw new IncompatibleSchemaException(
|
||||
s"Cannot convert Avro schema to catalyst type because schema at path " +
|
||||
s"${path.mkString(".")} is not compatible " +
|
||||
s"(avroType = $other, sqlType = $sqlType). \n" +
|
||||
s"Source Avro schema: $sourceAvroSchema.\n" +
|
||||
s"Target Catalyst type: $targetSqlType")
|
||||
}
|
||||
}
|
||||
case (left, right) =>
|
||||
throw new IncompatibleSchemaException(
|
||||
s"Cannot convert Avro schema to catalyst type because schema at path " +
|
||||
s"${path.mkString(".")} is not compatible (avroType = $left, sqlType = $right). \n" +
|
||||
s"Source Avro schema: $sourceAvroSchema.\n" +
|
||||
s"Target Catalyst type: $targetSqlType")
|
||||
}
|
||||
}
|
||||
|
||||
createConverter(sourceAvroSchema, targetSqlType, List.empty[String])
|
||||
}
|
||||
|
||||
def createConverterToAvro(dataType: DataType,
|
||||
structName: String,
|
||||
recordNamespace: String): Any => Any = {
|
||||
dataType match {
|
||||
case BinaryType => (item: Any) =>
|
||||
item match {
|
||||
case null => null
|
||||
case bytes: Array[Byte] => ByteBuffer.wrap(bytes)
|
||||
}
|
||||
case IntegerType | LongType |
|
||||
FloatType | DoubleType | StringType | BooleanType => identity
|
||||
case ByteType => (item: Any) =>
|
||||
if (item == null) null else item.asInstanceOf[Byte].intValue
|
||||
case ShortType => (item: Any) =>
|
||||
if (item == null) null else item.asInstanceOf[Short].intValue
|
||||
case dec: DecimalType =>
|
||||
val schema = SchemaConverters.toAvroType(dec, nullable = false, structName, recordNamespace)
|
||||
(item: Any) => {
|
||||
Option(item).map { _ =>
|
||||
val bigDecimalValue = item.asInstanceOf[java.math.BigDecimal]
|
||||
val decimalConversions = new DecimalConversion()
|
||||
decimalConversions.toFixed(bigDecimalValue, schema, LogicalTypes.decimal(dec.precision, dec.scale))
|
||||
}.orNull
|
||||
}
|
||||
case TimestampType => (item: Any) =>
|
||||
// Convert time to microseconds since spark-avro by default converts TimestampType to
|
||||
// Avro Logical TimestampMicros
|
||||
Option(item).map(_.asInstanceOf[Timestamp].getTime * 1000).orNull
|
||||
case DateType => (item: Any) =>
|
||||
Option(item).map(_.asInstanceOf[Date].toLocalDate.toEpochDay.toInt).orNull
|
||||
case ArrayType(elementType, _) =>
|
||||
val elementConverter = createConverterToAvro(
|
||||
elementType,
|
||||
structName,
|
||||
recordNamespace)
|
||||
(item: Any) => {
|
||||
if (item == null) {
|
||||
null
|
||||
} else {
|
||||
val sourceArray = item.asInstanceOf[Seq[Any]]
|
||||
val sourceArraySize = sourceArray.size
|
||||
val targetList = new util.ArrayList[Any](sourceArraySize)
|
||||
var idx = 0
|
||||
while (idx < sourceArraySize) {
|
||||
targetList.add(elementConverter(sourceArray(idx)))
|
||||
idx += 1
|
||||
}
|
||||
targetList
|
||||
}
|
||||
}
|
||||
case MapType(StringType, valueType, _) =>
|
||||
val valueConverter = createConverterToAvro(
|
||||
valueType,
|
||||
structName,
|
||||
recordNamespace)
|
||||
(item: Any) => {
|
||||
if (item == null) {
|
||||
null
|
||||
} else {
|
||||
val javaMap = new util.HashMap[String, Any]()
|
||||
item.asInstanceOf[Map[String, Any]].foreach { case (key, value) =>
|
||||
javaMap.put(key, valueConverter(value))
|
||||
}
|
||||
javaMap
|
||||
}
|
||||
}
|
||||
case structType: StructType =>
|
||||
val schema: Schema = SchemaConverters.toAvroType(structType, nullable = false, structName, recordNamespace)
|
||||
val childNameSpace = if (recordNamespace != "") s"$recordNamespace.$structName" else structName
|
||||
val fieldConverters = structType.fields.map(field =>
|
||||
createConverterToAvro(
|
||||
field.dataType,
|
||||
field.name,
|
||||
childNameSpace))
|
||||
(item: Any) => {
|
||||
if (item == null) {
|
||||
null
|
||||
} else {
|
||||
val record = new Record(schema)
|
||||
val convertersIterator = fieldConverters.iterator
|
||||
val fieldNamesIterator = dataType.asInstanceOf[StructType].fieldNames.iterator
|
||||
val rowIterator = item.asInstanceOf[Row].toSeq.iterator
|
||||
|
||||
while (convertersIterator.hasNext) {
|
||||
val converter = convertersIterator.next()
|
||||
record.put(fieldNamesIterator.next(), converter(rowIterator.next()))
|
||||
}
|
||||
record
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,100 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi
|
||||
|
||||
import org.apache.avro.Schema
|
||||
import org.apache.avro.generic.{GenericRecord, GenericRecordBuilder, IndexedRecord}
|
||||
import org.apache.hudi.avro.HoodieAvroUtils
|
||||
import org.apache.hudi.common.model.HoodieKey
|
||||
import org.apache.spark.rdd.RDD
|
||||
import org.apache.spark.sql.avro.SchemaConverters
|
||||
import org.apache.spark.sql.catalyst.encoders.RowEncoder
|
||||
import org.apache.spark.sql.types.StructType
|
||||
import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
|
||||
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
object AvroConversionUtils {
|
||||
|
||||
def createRdd(df: DataFrame, structName: String, recordNamespace: String): RDD[GenericRecord] = {
|
||||
val avroSchema = convertStructTypeToAvroSchema(df.schema, structName, recordNamespace)
|
||||
createRdd(df, avroSchema, structName, recordNamespace)
|
||||
}
|
||||
|
||||
def createRdd(df: DataFrame, avroSchema: Schema, structName: String, recordNamespace: String)
|
||||
: RDD[GenericRecord] = {
|
||||
// Use the Avro schema to derive the StructType which has the correct nullability information
|
||||
val dataType = SchemaConverters.toSqlType(avroSchema).dataType.asInstanceOf[StructType]
|
||||
val encoder = RowEncoder.apply(dataType).resolveAndBind()
|
||||
df.queryExecution.toRdd.map(encoder.fromRow)
|
||||
.mapPartitions { records =>
|
||||
if (records.isEmpty) Iterator.empty
|
||||
else {
|
||||
val convertor = AvroConversionHelper.createConverterToAvro(dataType, structName, recordNamespace)
|
||||
records.map { x => convertor(x).asInstanceOf[GenericRecord] }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def createRddForDeletes(df: DataFrame, rowField: String, partitionField: String): RDD[HoodieKey] = {
|
||||
df.rdd.map(row => new HoodieKey(row.getAs[String](rowField), row.getAs[String](partitionField)))
|
||||
}
|
||||
|
||||
def createDataFrame(rdd: RDD[GenericRecord], schemaStr: String, ss: SparkSession): Dataset[Row] = {
|
||||
if (rdd.isEmpty()) {
|
||||
ss.emptyDataFrame
|
||||
} else {
|
||||
ss.createDataFrame(rdd.mapPartitions { records =>
|
||||
if (records.isEmpty) Iterator.empty
|
||||
else {
|
||||
val schema = new Schema.Parser().parse(schemaStr)
|
||||
val dataType = convertAvroSchemaToStructType(schema)
|
||||
val convertor = AvroConversionHelper.createConverterToRow(schema, dataType)
|
||||
records.map { x => convertor(x).asInstanceOf[Row] }
|
||||
}
|
||||
}, convertAvroSchemaToStructType(new Schema.Parser().parse(schemaStr)))
|
||||
}
|
||||
}
|
||||
|
||||
def convertStructTypeToAvroSchema(structType: StructType,
|
||||
structName: String,
|
||||
recordNamespace: String): Schema = {
|
||||
SchemaConverters.toAvroType(structType, nullable = false, structName, recordNamespace)
|
||||
}
|
||||
|
||||
def convertAvroSchemaToStructType(avroSchema: Schema): StructType = {
|
||||
SchemaConverters.toSqlType(avroSchema).dataType.asInstanceOf[StructType]
|
||||
}
|
||||
|
||||
def buildAvroRecordBySchema(record: IndexedRecord,
|
||||
requiredSchema: Schema,
|
||||
requiredPos: List[Int],
|
||||
recordBuilder: GenericRecordBuilder): GenericRecord = {
|
||||
val requiredFields = requiredSchema.getFields.asScala
|
||||
assert(requiredFields.length == requiredPos.length)
|
||||
val positionIterator = requiredPos.iterator
|
||||
requiredFields.foreach(f => recordBuilder.set(f, record.get(positionIterator.next())))
|
||||
recordBuilder.build()
|
||||
}
|
||||
|
||||
def getAvroRecordNameAndNamespace(tableName: String): (String, String) = {
|
||||
val name = HoodieAvroUtils.sanitizeName(tableName)
|
||||
(s"${name}_record", s"hoodie.${name}")
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,154 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.keygen;
|
||||
|
||||
import org.apache.hudi.common.config.TypedProperties;
|
||||
import org.apache.hudi.common.model.HoodieKey;
|
||||
import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
|
||||
import org.apache.hudi.exception.HoodieKeyException;
|
||||
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
|
||||
import org.apache.hudi.testutils.KeyGeneratorTestUtilities;
|
||||
import org.apache.spark.sql.Row;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import static junit.framework.TestCase.assertEquals;
|
||||
|
||||
public class TestComplexKeyGenerator extends KeyGeneratorTestUtilities {
|
||||
|
||||
private TypedProperties getCommonProps(boolean getComplexRecordKey) {
|
||||
TypedProperties properties = new TypedProperties();
|
||||
if (getComplexRecordKey) {
|
||||
properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY, "_row_key, pii_col");
|
||||
} else {
|
||||
properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY, "_row_key");
|
||||
}
|
||||
properties.put(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_OPT_KEY, "true");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getPropertiesWithoutPartitionPathProp() {
|
||||
return getCommonProps(false);
|
||||
}
|
||||
|
||||
private TypedProperties getPropertiesWithoutRecordKeyProp() {
|
||||
TypedProperties properties = new TypedProperties();
|
||||
properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY, "timestamp");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getWrongRecordKeyFieldProps() {
|
||||
TypedProperties properties = new TypedProperties();
|
||||
properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY, "timestamp");
|
||||
properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY, "_wrong_key");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getProps() {
|
||||
TypedProperties properties = getCommonProps(true);
|
||||
properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY, "timestamp,ts_ms");
|
||||
return properties;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNullPartitionPathFields() {
|
||||
Assertions.assertThrows(IllegalArgumentException.class, () -> new ComplexKeyGenerator(getPropertiesWithoutPartitionPathProp()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNullRecordKeyFields() {
|
||||
Assertions.assertThrows(IllegalArgumentException.class, () -> new ComplexKeyGenerator(getPropertiesWithoutRecordKeyProp()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWrongRecordKeyField() {
|
||||
ComplexKeyGenerator keyGenerator = new ComplexKeyGenerator(getWrongRecordKeyFieldProps());
|
||||
Assertions.assertThrows(HoodieKeyException.class, () -> keyGenerator.getRecordKey(getRecord()));
|
||||
Assertions.assertThrows(HoodieKeyException.class, () -> keyGenerator.buildFieldPositionMapIfNeeded(KeyGeneratorTestUtilities.structType));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHappyFlow() {
|
||||
ComplexKeyGenerator keyGenerator = new ComplexKeyGenerator(getProps());
|
||||
GenericRecord record = getRecord();
|
||||
HoodieKey key = keyGenerator.getKey(record);
|
||||
Assertions.assertEquals(key.getRecordKey(), "_row_key:key1,pii_col:pi");
|
||||
Assertions.assertEquals(key.getPartitionPath(), "timestamp=4357686/ts_ms=2020-03-21");
|
||||
Row row = KeyGeneratorTestUtilities.getRow(record);
|
||||
Assertions.assertEquals(keyGenerator.getRecordKey(row), "_row_key:key1,pii_col:pi");
|
||||
Assertions.assertEquals(keyGenerator.getPartitionPath(row), "timestamp=4357686/ts_ms=2020-03-21");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSingleValueKeyGenerator() {
|
||||
TypedProperties properties = new TypedProperties();
|
||||
properties.setProperty(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY, "_row_key");
|
||||
properties.setProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY, "timestamp");
|
||||
ComplexKeyGenerator compositeKeyGenerator = new ComplexKeyGenerator(properties);
|
||||
assertEquals(compositeKeyGenerator.getRecordKeyFields().size(), 1);
|
||||
assertEquals(compositeKeyGenerator.getPartitionPathFields().size(), 1);
|
||||
HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
|
||||
GenericRecord record = dataGenerator.generateGenericRecords(1).get(0);
|
||||
String rowKey = record.get("_row_key").toString();
|
||||
String partitionPath = record.get("timestamp").toString();
|
||||
HoodieKey hoodieKey = compositeKeyGenerator.getKey(record);
|
||||
assertEquals("_row_key:" + rowKey, hoodieKey.getRecordKey());
|
||||
assertEquals(partitionPath, hoodieKey.getPartitionPath());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultipleValueKeyGenerator() {
|
||||
TypedProperties properties = new TypedProperties();
|
||||
properties.setProperty(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY, "_row_key,timestamp");
|
||||
properties.setProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY, "rider,driver");
|
||||
ComplexKeyGenerator compositeKeyGenerator = new ComplexKeyGenerator(properties);
|
||||
assertEquals(compositeKeyGenerator.getRecordKeyFields().size(), 2);
|
||||
assertEquals(compositeKeyGenerator.getPartitionPathFields().size(), 2);
|
||||
HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
|
||||
GenericRecord record = dataGenerator.generateGenericRecords(1).get(0);
|
||||
String rowKey =
|
||||
"_row_key" + ComplexAvroKeyGenerator.DEFAULT_RECORD_KEY_SEPARATOR + record.get("_row_key").toString() + ","
|
||||
+ "timestamp" + ComplexAvroKeyGenerator.DEFAULT_RECORD_KEY_SEPARATOR + record.get("timestamp").toString();
|
||||
String partitionPath = record.get("rider").toString() + "/" + record.get("driver").toString();
|
||||
HoodieKey hoodieKey = compositeKeyGenerator.getKey(record);
|
||||
assertEquals(rowKey, hoodieKey.getRecordKey());
|
||||
assertEquals(partitionPath, hoodieKey.getPartitionPath());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultipleValueKeyGeneratorNonPartitioned() {
|
||||
TypedProperties properties = new TypedProperties();
|
||||
properties.setProperty(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY, "_row_key,timestamp");
|
||||
properties.setProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY, "");
|
||||
ComplexKeyGenerator compositeKeyGenerator = new ComplexKeyGenerator(properties);
|
||||
assertEquals(compositeKeyGenerator.getRecordKeyFields().size(), 2);
|
||||
assertEquals(compositeKeyGenerator.getPartitionPathFields().size(), 0);
|
||||
HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
|
||||
GenericRecord record = dataGenerator.generateGenericRecords(1).get(0);
|
||||
String rowKey =
|
||||
"_row_key" + ComplexAvroKeyGenerator.DEFAULT_RECORD_KEY_SEPARATOR + record.get("_row_key").toString() + ","
|
||||
+ "timestamp" + ComplexAvroKeyGenerator.DEFAULT_RECORD_KEY_SEPARATOR + record.get("timestamp").toString();
|
||||
String partitionPath = "";
|
||||
HoodieKey hoodieKey = compositeKeyGenerator.getKey(record);
|
||||
assertEquals(rowKey, hoodieKey.getRecordKey());
|
||||
assertEquals(partitionPath, hoodieKey.getPartitionPath());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,224 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.keygen;
|
||||
|
||||
import org.apache.hudi.common.model.HoodieKey;
|
||||
import org.apache.hudi.common.config.TypedProperties;
|
||||
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
|
||||
import org.apache.hudi.testutils.KeyGeneratorTestUtilities;
|
||||
import org.apache.spark.sql.Row;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
public class TestCustomKeyGenerator extends KeyGeneratorTestUtilities {
|
||||
|
||||
private TypedProperties getCommonProps(boolean getComplexRecordKey) {
|
||||
TypedProperties properties = new TypedProperties();
|
||||
if (getComplexRecordKey) {
|
||||
properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY, "_row_key, pii_col");
|
||||
} else {
|
||||
properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY, "_row_key");
|
||||
}
|
||||
properties.put(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_OPT_KEY, "true");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getPropertiesForSimpleKeyGen() {
|
||||
TypedProperties properties = getCommonProps(false);
|
||||
properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY, "timestamp:simple");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getImproperPartitionFieldFormatProp() {
|
||||
TypedProperties properties = getCommonProps(false);
|
||||
properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY, "timestamp");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getInvalidPartitionKeyTypeProps() {
|
||||
TypedProperties properties = getCommonProps(false);
|
||||
properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY, "timestamp:dummy");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getComplexRecordKeyWithSimplePartitionProps() {
|
||||
TypedProperties properties = getCommonProps(true);
|
||||
properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY, "timestamp:simple");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getComplexRecordKeyAndPartitionPathProps() {
|
||||
TypedProperties properties = getCommonProps(true);
|
||||
properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY, "timestamp:simple,ts_ms:timestamp");
|
||||
populateNecessaryPropsForTimestampBasedKeyGen(properties);
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getPropsWithoutRecordKeyFieldProps() {
|
||||
TypedProperties properties = new TypedProperties();
|
||||
properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY, "timestamp:simple");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private void populateNecessaryPropsForTimestampBasedKeyGen(TypedProperties properties) {
|
||||
properties.put("hoodie.deltastreamer.keygen.timebased.timestamp.type", "DATE_STRING");
|
||||
properties.put("hoodie.deltastreamer.keygen.timebased.input.dateformat", "yyyy-MM-dd");
|
||||
properties.put("hoodie.deltastreamer.keygen.timebased.output.dateformat", "yyyyMMdd");
|
||||
}
|
||||
|
||||
private TypedProperties getPropertiesForTimestampBasedKeyGen() {
|
||||
TypedProperties properties = getCommonProps(false);
|
||||
properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY, "ts_ms:timestamp");
|
||||
populateNecessaryPropsForTimestampBasedKeyGen(properties);
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getPropertiesForNonPartitionedKeyGen() {
|
||||
TypedProperties properties = getCommonProps(false);
|
||||
properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY, "");
|
||||
return properties;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimpleKeyGenerator() {
|
||||
BuiltinKeyGenerator keyGenerator = new CustomKeyGenerator(getPropertiesForSimpleKeyGen());
|
||||
GenericRecord record = getRecord();
|
||||
HoodieKey key = keyGenerator.getKey(record);
|
||||
Assertions.assertEquals(key.getRecordKey(), "key1");
|
||||
Assertions.assertEquals(key.getPartitionPath(), "timestamp=4357686");
|
||||
Row row = KeyGeneratorTestUtilities.getRow(record);
|
||||
Assertions.assertEquals(keyGenerator.getRecordKey(row), "key1");
|
||||
Assertions.assertEquals(keyGenerator.getPartitionPath(row), "timestamp=4357686");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTimestampBasedKeyGenerator() {
|
||||
BuiltinKeyGenerator keyGenerator = new CustomKeyGenerator(getPropertiesForTimestampBasedKeyGen());
|
||||
GenericRecord record = getRecord();
|
||||
HoodieKey key = keyGenerator.getKey(record);
|
||||
Assertions.assertEquals(key.getRecordKey(), "key1");
|
||||
Assertions.assertEquals(key.getPartitionPath(), "ts_ms=20200321");
|
||||
Row row = KeyGeneratorTestUtilities.getRow(record);
|
||||
Assertions.assertEquals(keyGenerator.getRecordKey(row), "key1");
|
||||
Assertions.assertEquals(keyGenerator.getPartitionPath(row), "ts_ms=20200321");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNonPartitionedKeyGenerator() {
|
||||
BuiltinKeyGenerator keyGenerator = new CustomKeyGenerator(getPropertiesForNonPartitionedKeyGen());
|
||||
GenericRecord record = getRecord();
|
||||
HoodieKey key = keyGenerator.getKey(record);
|
||||
Assertions.assertEquals(key.getRecordKey(), "key1");
|
||||
Assertions.assertTrue(key.getPartitionPath().isEmpty());
|
||||
Row row = KeyGeneratorTestUtilities.getRow(record);
|
||||
Assertions.assertEquals(keyGenerator.getRecordKey(row), "key1");
|
||||
Assertions.assertTrue(keyGenerator.getPartitionPath(row).isEmpty());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testInvalidPartitionKeyType() {
|
||||
try {
|
||||
BuiltinKeyGenerator keyGenerator = new CustomKeyGenerator(getInvalidPartitionKeyTypeProps());
|
||||
keyGenerator.getKey(getRecord());
|
||||
Assertions.fail("should fail when invalid PartitionKeyType is provided!");
|
||||
} catch (Exception e) {
|
||||
Assertions.assertTrue(e.getMessage().contains("No enum constant org.apache.hudi.keygen.CustomAvroKeyGenerator.PartitionKeyType.DUMMY"));
|
||||
}
|
||||
|
||||
try {
|
||||
BuiltinKeyGenerator keyGenerator = new CustomKeyGenerator(getInvalidPartitionKeyTypeProps());
|
||||
GenericRecord record = getRecord();
|
||||
Row row = KeyGeneratorTestUtilities.getRow(record);
|
||||
keyGenerator.getPartitionPath(row);
|
||||
Assertions.fail("should fail when invalid PartitionKeyType is provided!");
|
||||
} catch (Exception e) {
|
||||
Assertions.assertTrue(e.getMessage().contains("No enum constant org.apache.hudi.keygen.CustomAvroKeyGenerator.PartitionKeyType.DUMMY"));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoRecordKeyFieldProp() {
|
||||
try {
|
||||
BuiltinKeyGenerator keyGenerator = new CustomKeyGenerator(getPropsWithoutRecordKeyFieldProps());
|
||||
keyGenerator.getKey(getRecord());
|
||||
Assertions.fail("should fail when record key field is not provided!");
|
||||
} catch (Exception e) {
|
||||
Assertions.assertTrue(e.getMessage().contains("Property hoodie.datasource.write.recordkey.field not found"));
|
||||
}
|
||||
|
||||
try {
|
||||
BuiltinKeyGenerator keyGenerator = new CustomKeyGenerator(getPropsWithoutRecordKeyFieldProps());
|
||||
GenericRecord record = getRecord();
|
||||
Row row = KeyGeneratorTestUtilities.getRow(record);
|
||||
keyGenerator.getRecordKey(row);
|
||||
Assertions.fail("should fail when record key field is not provided!");
|
||||
} catch (Exception e) {
|
||||
Assertions.assertTrue(e.getMessage().contains("Property hoodie.datasource.write.recordkey.field not found"));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPartitionFieldsInImproperFormat() {
|
||||
try {
|
||||
BuiltinKeyGenerator keyGenerator = new CustomKeyGenerator(getImproperPartitionFieldFormatProp());
|
||||
keyGenerator.getKey(getRecord());
|
||||
Assertions.fail("should fail when partition key field is provided in improper format!");
|
||||
} catch (Exception e) {
|
||||
Assertions.assertTrue(e.getMessage().contains("Unable to find field names for partition path in proper format"));
|
||||
}
|
||||
|
||||
try {
|
||||
BuiltinKeyGenerator keyGenerator = new CustomKeyGenerator(getImproperPartitionFieldFormatProp());
|
||||
GenericRecord record = getRecord();
|
||||
Row row = KeyGeneratorTestUtilities.getRow(record);
|
||||
keyGenerator.getPartitionPath(row);
|
||||
Assertions.fail("should fail when partition key field is provided in improper format!");
|
||||
} catch (Exception e) {
|
||||
Assertions.assertTrue(e.getMessage().contains("Unable to find field names for partition path in proper format"));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testComplexRecordKeyWithSimplePartitionPath() {
|
||||
BuiltinKeyGenerator keyGenerator = new CustomKeyGenerator(getComplexRecordKeyWithSimplePartitionProps());
|
||||
GenericRecord record = getRecord();
|
||||
HoodieKey key = keyGenerator.getKey(record);
|
||||
Assertions.assertEquals(key.getRecordKey(), "_row_key:key1,pii_col:pi");
|
||||
Assertions.assertEquals(key.getPartitionPath(), "timestamp=4357686");
|
||||
|
||||
Row row = KeyGeneratorTestUtilities.getRow(record);
|
||||
Assertions.assertEquals(keyGenerator.getRecordKey(row), "_row_key:key1,pii_col:pi");
|
||||
Assertions.assertEquals(keyGenerator.getPartitionPath(row), "timestamp=4357686");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testComplexRecordKeysWithComplexPartitionPath() {
|
||||
BuiltinKeyGenerator keyGenerator = new CustomKeyGenerator(getComplexRecordKeyAndPartitionPathProps());
|
||||
GenericRecord record = getRecord();
|
||||
HoodieKey key = keyGenerator.getKey(record);
|
||||
Assertions.assertEquals(key.getRecordKey(), "_row_key:key1,pii_col:pi");
|
||||
Assertions.assertEquals(key.getPartitionPath(), "timestamp=4357686/ts_ms=20200321");
|
||||
|
||||
Row row = KeyGeneratorTestUtilities.getRow(record);
|
||||
Assertions.assertEquals(keyGenerator.getRecordKey(row), "_row_key:key1,pii_col:pi");
|
||||
Assertions.assertEquals(keyGenerator.getPartitionPath(row), "timestamp=4357686/ts_ms=20200321");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,87 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.keygen;
|
||||
|
||||
import org.apache.hudi.common.config.TypedProperties;
|
||||
import org.apache.hudi.common.model.HoodieKey;
|
||||
import org.apache.hudi.exception.HoodieKeyException;
|
||||
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
|
||||
import org.apache.hudi.testutils.KeyGeneratorTestUtilities;
|
||||
import org.apache.spark.sql.Row;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
public class TestGlobalDeleteKeyGenerator extends KeyGeneratorTestUtilities {
|
||||
|
||||
private TypedProperties getCommonProps(boolean getComplexRecordKey) {
|
||||
TypedProperties properties = new TypedProperties();
|
||||
if (getComplexRecordKey) {
|
||||
properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY, "_row_key,pii_col");
|
||||
} else {
|
||||
properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY, "_row_key");
|
||||
}
|
||||
properties.put(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_OPT_KEY, "true");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getPropertiesWithoutRecordKeyProp() {
|
||||
TypedProperties properties = new TypedProperties();
|
||||
properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY, "timestamp");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getWrongRecordKeyFieldProps() {
|
||||
TypedProperties properties = new TypedProperties();
|
||||
properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY, "_wrong_key");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getProps() {
|
||||
TypedProperties properties = getCommonProps(true);
|
||||
properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY, "timestamp,ts_ms");
|
||||
return properties;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNullRecordKeyFields() {
|
||||
Assertions.assertThrows(IllegalArgumentException.class, () -> new GlobalDeleteKeyGenerator(getPropertiesWithoutRecordKeyProp()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWrongRecordKeyField() {
|
||||
GlobalDeleteKeyGenerator keyGenerator = new GlobalDeleteKeyGenerator(getWrongRecordKeyFieldProps());
|
||||
Assertions.assertThrows(HoodieKeyException.class, () -> keyGenerator.getRecordKey(getRecord()));
|
||||
Assertions.assertThrows(HoodieKeyException.class, () -> keyGenerator.buildFieldPositionMapIfNeeded(KeyGeneratorTestUtilities.structType));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHappyFlow() {
|
||||
GlobalDeleteKeyGenerator keyGenerator = new GlobalDeleteKeyGenerator(getProps());
|
||||
GenericRecord record = getRecord();
|
||||
HoodieKey key = keyGenerator.getKey(record);
|
||||
Assertions.assertEquals(key.getRecordKey(), "_row_key:key1,pii_col:pi");
|
||||
Assertions.assertEquals(key.getPartitionPath(), "");
|
||||
keyGenerator.buildFieldPositionMapIfNeeded(KeyGeneratorTestUtilities.structType);
|
||||
Row row = KeyGeneratorTestUtilities.getRow(record);
|
||||
Assertions.assertEquals(keyGenerator.getRecordKey(row), "_row_key:key1,pii_col:pi");
|
||||
Assertions.assertEquals(keyGenerator.getPartitionPath(row), "");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,124 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.keygen;
|
||||
|
||||
import org.apache.hudi.common.config.TypedProperties;
|
||||
import org.apache.hudi.common.model.HoodieKey;
|
||||
import org.apache.hudi.exception.HoodieKeyException;
|
||||
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
|
||||
import org.apache.hudi.testutils.KeyGeneratorTestUtilities;
|
||||
import org.apache.spark.sql.Row;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
public class TestSimpleKeyGenerator extends KeyGeneratorTestUtilities {
|
||||
|
||||
private TypedProperties getCommonProps() {
|
||||
TypedProperties properties = new TypedProperties();
|
||||
properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY, "_row_key");
|
||||
properties.put(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_OPT_KEY, "true");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getPropertiesWithoutPartitionPathProp() {
|
||||
return getCommonProps();
|
||||
}
|
||||
|
||||
private TypedProperties getPropertiesWithoutRecordKeyProp() {
|
||||
TypedProperties properties = new TypedProperties();
|
||||
properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY, "timestamp");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getWrongRecordKeyFieldProps() {
|
||||
TypedProperties properties = new TypedProperties();
|
||||
properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY, "timestamp");
|
||||
properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY, "_wrong_key");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getWrongPartitionPathFieldProps() {
|
||||
TypedProperties properties = new TypedProperties();
|
||||
properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY, "_wrong_partition_path");
|
||||
properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY, "_row_key");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getComplexRecordKeyProp() {
|
||||
TypedProperties properties = new TypedProperties();
|
||||
properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY, "timestamp");
|
||||
properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY, "_row_key,pii_col");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getProps() {
|
||||
TypedProperties properties = getCommonProps();
|
||||
properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY, "timestamp");
|
||||
return properties;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNullPartitionPathFields() {
|
||||
Assertions.assertThrows(IllegalArgumentException.class, () -> new SimpleKeyGenerator(getPropertiesWithoutPartitionPathProp()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNullRecordKeyFields() {
|
||||
Assertions.assertThrows(IllegalArgumentException.class, () -> new SimpleKeyGenerator(getPropertiesWithoutRecordKeyProp()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWrongRecordKeyField() {
|
||||
SimpleKeyGenerator keyGenerator = new SimpleKeyGenerator(getWrongRecordKeyFieldProps());
|
||||
Assertions.assertThrows(HoodieKeyException.class, () -> keyGenerator.getRecordKey(getRecord()));
|
||||
Assertions.assertThrows(HoodieKeyException.class, () -> keyGenerator.buildFieldPositionMapIfNeeded(KeyGeneratorTestUtilities.structType));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWrongPartitionPathField() {
|
||||
SimpleKeyGenerator keyGenerator = new SimpleKeyGenerator(getWrongPartitionPathFieldProps());
|
||||
GenericRecord record = getRecord();
|
||||
Assertions.assertEquals(keyGenerator.getPartitionPath(record), KeyGenUtils.DEFAULT_PARTITION_PATH);
|
||||
Assertions.assertEquals(keyGenerator.getPartitionPath(KeyGeneratorTestUtilities.getRow(record)),
|
||||
KeyGenUtils.DEFAULT_PARTITION_PATH);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testComplexRecordKeyField() {
|
||||
SimpleKeyGenerator keyGenerator = new SimpleKeyGenerator(getComplexRecordKeyProp());
|
||||
Assertions.assertThrows(HoodieKeyException.class, () -> keyGenerator.getRecordKey(getRecord()));
|
||||
Assertions.assertThrows(HoodieKeyException.class, () -> keyGenerator.buildFieldPositionMapIfNeeded(KeyGeneratorTestUtilities.structType));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHappyFlow() {
|
||||
SimpleKeyGenerator keyGenerator = new SimpleKeyGenerator(getProps());
|
||||
GenericRecord record = getRecord();
|
||||
HoodieKey key = keyGenerator.getKey(getRecord());
|
||||
Assertions.assertEquals(key.getRecordKey(), "key1");
|
||||
Assertions.assertEquals(key.getPartitionPath(), "timestamp=4357686");
|
||||
|
||||
Row row = KeyGeneratorTestUtilities.getRow(record);
|
||||
Assertions.assertEquals(keyGenerator.getRecordKey(row), "key1");
|
||||
Assertions.assertEquals(keyGenerator.getPartitionPath(row), "timestamp=4357686");
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,366 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.keygen;
|
||||
|
||||
import org.apache.hudi.AvroConversionHelper;
|
||||
import org.apache.hudi.AvroConversionUtils;
|
||||
import org.apache.hudi.common.config.TypedProperties;
|
||||
import org.apache.hudi.common.model.HoodieKey;
|
||||
import org.apache.hudi.common.testutils.SchemaTestUtil;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.hudi.exception.HoodieKeyGeneratorException;
|
||||
import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
|
||||
import org.apache.spark.sql.Row;
|
||||
import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema;
|
||||
import org.apache.spark.sql.types.StructType;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import scala.Function1;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
public class TestTimestampBasedKeyGenerator {
|
||||
|
||||
private GenericRecord baseRecord;
|
||||
private TypedProperties properties = new TypedProperties();
|
||||
|
||||
private Schema schema;
|
||||
private StructType structType;
|
||||
private Row baseRow;
|
||||
|
||||
@BeforeEach
|
||||
public void initialize() throws IOException {
|
||||
schema = SchemaTestUtil.getTimestampEvolvedSchema();
|
||||
structType = AvroConversionUtils.convertAvroSchemaToStructType(schema);
|
||||
baseRecord = SchemaTestUtil
|
||||
.generateAvroRecordFromJson(schema, 1, "001", "f1");
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
|
||||
properties.setProperty(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY, "field1");
|
||||
properties.setProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY, "createTime");
|
||||
properties.setProperty(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_OPT_KEY, "false");
|
||||
}
|
||||
|
||||
private TypedProperties getBaseKeyConfig(String timestampType, String dateFormat, String timezone, String scalarType) {
|
||||
properties.setProperty(TimestampBasedAvroKeyGenerator.Config.TIMESTAMP_TYPE_FIELD_PROP, timestampType);
|
||||
properties.setProperty(TimestampBasedAvroKeyGenerator.Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP, dateFormat);
|
||||
properties.setProperty(TimestampBasedAvroKeyGenerator.Config.TIMESTAMP_TIMEZONE_FORMAT_PROP, timezone);
|
||||
|
||||
if (scalarType != null) {
|
||||
properties.setProperty("hoodie.deltastreamer.keygen.timebased.timestamp.scalar.time.unit", scalarType);
|
||||
}
|
||||
|
||||
return properties;
|
||||
}
|
||||
|
||||
private Row genericRecordToRow(GenericRecord baseRecord) {
|
||||
Function1<Object, Object> convertor = AvroConversionHelper.createConverterToRow(schema, structType);
|
||||
Row row = (Row) convertor.apply(baseRecord);
|
||||
int fieldCount = structType.fieldNames().length;
|
||||
Object[] values = new Object[fieldCount];
|
||||
for (int i = 0; i < fieldCount; i++) {
|
||||
values[i] = row.get(i);
|
||||
}
|
||||
return new GenericRowWithSchema(values, structType);
|
||||
}
|
||||
|
||||
private TypedProperties getBaseKeyConfig(String timestampType, String inputFormatList, String inputFormatDelimiterRegex, String inputTimezone, String outputFormat, String outputTimezone) {
|
||||
if (timestampType != null) {
|
||||
properties.setProperty(TimestampBasedAvroKeyGenerator.Config.TIMESTAMP_TYPE_FIELD_PROP, timestampType);
|
||||
}
|
||||
if (inputFormatList != null) {
|
||||
properties.setProperty(TimestampBasedAvroKeyGenerator.Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP, inputFormatList);
|
||||
}
|
||||
if (inputFormatDelimiterRegex != null) {
|
||||
properties.setProperty(TimestampBasedAvroKeyGenerator.Config.TIMESTAMP_INPUT_DATE_FORMAT_LIST_DELIMITER_REGEX_PROP, inputFormatDelimiterRegex);
|
||||
}
|
||||
if (inputTimezone != null) {
|
||||
properties.setProperty(TimestampBasedAvroKeyGenerator.Config.TIMESTAMP_INPUT_TIMEZONE_FORMAT_PROP, inputTimezone);
|
||||
}
|
||||
if (outputFormat != null) {
|
||||
properties.setProperty(TimestampBasedAvroKeyGenerator.Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP, outputFormat);
|
||||
}
|
||||
if (outputTimezone != null) {
|
||||
properties.setProperty(TimestampBasedAvroKeyGenerator.Config.TIMESTAMP_OUTPUT_TIMEZONE_FORMAT_PROP, outputTimezone);
|
||||
}
|
||||
return properties;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTimestampBasedKeyGenerator() throws IOException {
|
||||
// timezone is GMT+8:00
|
||||
baseRecord.put("createTime", 1578283932000L);
|
||||
properties = getBaseKeyConfig("EPOCHMILLISECONDS", "yyyy-MM-dd hh", "GMT+8:00", null);
|
||||
TimestampBasedKeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
|
||||
HoodieKey hk1 = keyGen.getKey(baseRecord);
|
||||
assertEquals("2020-01-06 12", hk1.getPartitionPath());
|
||||
|
||||
// test w/ Row
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("2020-01-06 12", keyGen.getPartitionPath(baseRow));
|
||||
|
||||
// timezone is GMT
|
||||
properties = getBaseKeyConfig("EPOCHMILLISECONDS", "yyyy-MM-dd hh", "GMT", null);
|
||||
keyGen = new TimestampBasedKeyGenerator(properties);
|
||||
HoodieKey hk2 = keyGen.getKey(baseRecord);
|
||||
assertEquals("2020-01-06 04", hk2.getPartitionPath());
|
||||
|
||||
// test w/ Row
|
||||
assertEquals("2020-01-06 04", keyGen.getPartitionPath(baseRow));
|
||||
|
||||
// timestamp is DATE_STRING, timezone is GMT+8:00
|
||||
baseRecord.put("createTime", "2020-01-06 12:12:12");
|
||||
properties = getBaseKeyConfig("DATE_STRING", "yyyy-MM-dd hh", "GMT+8:00", null);
|
||||
properties.setProperty("hoodie.deltastreamer.keygen.timebased.input.dateformat", "yyyy-MM-dd hh:mm:ss");
|
||||
keyGen = new TimestampBasedKeyGenerator(properties);
|
||||
HoodieKey hk3 = keyGen.getKey(baseRecord);
|
||||
assertEquals("2020-01-06 12", hk3.getPartitionPath());
|
||||
|
||||
// test w/ Row
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("2020-01-06 12", keyGen.getPartitionPath(baseRow));
|
||||
|
||||
// timezone is GMT
|
||||
properties = getBaseKeyConfig("DATE_STRING", "yyyy-MM-dd hh", "GMT", null);
|
||||
keyGen = new TimestampBasedKeyGenerator(properties);
|
||||
HoodieKey hk4 = keyGen.getKey(baseRecord);
|
||||
assertEquals("2020-01-06 12", hk4.getPartitionPath());
|
||||
|
||||
// test w/ Row
|
||||
assertEquals("2020-01-06 12", keyGen.getPartitionPath(baseRow));
|
||||
|
||||
// timezone is GMT+8:00, createTime is null
|
||||
baseRecord.put("createTime", null);
|
||||
properties = getBaseKeyConfig("EPOCHMILLISECONDS", "yyyy-MM-dd hh", "GMT+8:00", null);
|
||||
keyGen = new TimestampBasedKeyGenerator(properties);
|
||||
HoodieKey hk5 = keyGen.getKey(baseRecord);
|
||||
assertEquals("1970-01-01 08", hk5.getPartitionPath());
|
||||
|
||||
// test w/ Row
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("1970-01-01 08", keyGen.getPartitionPath(baseRow));
|
||||
|
||||
// timestamp is DATE_STRING, timezone is GMT, createTime is null
|
||||
baseRecord.put("createTime", null);
|
||||
properties = getBaseKeyConfig("DATE_STRING", "yyyy-MM-dd hh:mm:ss", "GMT", null);
|
||||
properties.setProperty("hoodie.deltastreamer.keygen.timebased.input.dateformat", "yyyy-MM-dd hh:mm:ss");
|
||||
keyGen = new TimestampBasedKeyGenerator(properties);
|
||||
HoodieKey hk6 = keyGen.getKey(baseRecord);
|
||||
assertEquals("1970-01-01 12:00:00", hk6.getPartitionPath());
|
||||
|
||||
// test w/ Row
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("1970-01-01 12:00:00", keyGen.getPartitionPath(baseRow));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testScalar() throws IOException {
|
||||
// timezone is GMT+8:00
|
||||
baseRecord.put("createTime", 20000L);
|
||||
|
||||
// timezone is GMT
|
||||
properties = getBaseKeyConfig("SCALAR", "yyyy-MM-dd hh", "GMT", "days");
|
||||
TimestampBasedKeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
|
||||
HoodieKey hk1 = keyGen.getKey(baseRecord);
|
||||
assertEquals(hk1.getPartitionPath(), "2024-10-04 12");
|
||||
|
||||
// test w/ Row
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("2024-10-04 12", keyGen.getPartitionPath(baseRow));
|
||||
|
||||
// timezone is GMT, createTime is null
|
||||
baseRecord.put("createTime", null);
|
||||
properties = getBaseKeyConfig("SCALAR", "yyyy-MM-dd hh", "GMT", "days");
|
||||
keyGen = new TimestampBasedKeyGenerator(properties);
|
||||
HoodieKey hk2 = keyGen.getKey(baseRecord);
|
||||
assertEquals("1970-01-02 12", hk2.getPartitionPath());
|
||||
|
||||
// test w/ Row
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("1970-01-02 12", keyGen.getPartitionPath(baseRow));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_ExpectsMatch_SingleInputFormat_ISO8601WithMsZ_OutputTimezoneAsUTC() throws IOException {
|
||||
baseRecord.put("createTime", "2020-04-01T13:01:33.428Z");
|
||||
properties = this.getBaseKeyConfig(
|
||||
"DATE_STRING",
|
||||
"yyyy-MM-dd'T'HH:mm:ss.SSSZ",
|
||||
"",
|
||||
"",
|
||||
"yyyyMMddHH",
|
||||
"GMT");
|
||||
BuiltinKeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
|
||||
HoodieKey hk1 = keyGen.getKey(baseRecord);
|
||||
Assertions.assertEquals("2020040113", hk1.getPartitionPath());
|
||||
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("2020040113", keyGen.getPartitionPath(baseRow));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_ExpectsMatch_SingleInputFormats_ISO8601WithMsZ_OutputTimezoneAsInputDateTimeZone() throws IOException {
|
||||
baseRecord.put("createTime", "2020-04-01T13:01:33.428Z");
|
||||
properties = this.getBaseKeyConfig(
|
||||
"DATE_STRING",
|
||||
"yyyy-MM-dd'T'HH:mm:ss.SSSZ",
|
||||
"",
|
||||
"",
|
||||
"yyyyMMddHH",
|
||||
"");
|
||||
BuiltinKeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
|
||||
HoodieKey hk1 = keyGen.getKey(baseRecord);
|
||||
Assertions.assertEquals("2020040113", hk1.getPartitionPath());
|
||||
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("2020040113", keyGen.getPartitionPath(baseRow));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_ExpectsMatch_MultipleInputFormats_ISO8601WithMsZ_OutputTimezoneAsUTC() throws IOException {
|
||||
baseRecord.put("createTime", "2020-04-01T13:01:33.428Z");
|
||||
properties = this.getBaseKeyConfig(
|
||||
"DATE_STRING",
|
||||
"yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ",
|
||||
"",
|
||||
"",
|
||||
"yyyyMMddHH",
|
||||
"UTC");
|
||||
BuiltinKeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
|
||||
HoodieKey hk1 = keyGen.getKey(baseRecord);
|
||||
Assertions.assertEquals("2020040113", hk1.getPartitionPath());
|
||||
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("2020040113", keyGen.getPartitionPath(baseRow));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_ExpectsMatch_MultipleInputFormats_ISO8601NoMsZ_OutputTimezoneAsUTC() throws IOException {
|
||||
baseRecord.put("createTime", "2020-04-01T13:01:33Z");
|
||||
properties = this.getBaseKeyConfig(
|
||||
"DATE_STRING",
|
||||
"yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ",
|
||||
"",
|
||||
"",
|
||||
"yyyyMMddHH",
|
||||
"UTC");
|
||||
BuiltinKeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
|
||||
HoodieKey hk1 = keyGen.getKey(baseRecord);
|
||||
Assertions.assertEquals("2020040113", hk1.getPartitionPath());
|
||||
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("2020040113", keyGen.getPartitionPath(baseRow));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_ExpectsMatch_MultipleInputFormats_ISO8601NoMsWithOffset_OutputTimezoneAsUTC() throws IOException {
|
||||
baseRecord.put("createTime", "2020-04-01T13:01:33-05:00");
|
||||
properties = this.getBaseKeyConfig(
|
||||
"DATE_STRING",
|
||||
"yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ",
|
||||
"",
|
||||
"",
|
||||
"yyyyMMddHH",
|
||||
"UTC");
|
||||
BuiltinKeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
|
||||
HoodieKey hk1 = keyGen.getKey(baseRecord);
|
||||
Assertions.assertEquals("2020040118", hk1.getPartitionPath());
|
||||
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("2020040118", keyGen.getPartitionPath(baseRow));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_ExpectsMatch_MultipleInputFormats_ISO8601WithMsWithOffset_OutputTimezoneAsUTC() throws IOException {
|
||||
baseRecord.put("createTime", "2020-04-01T13:01:33.123-05:00");
|
||||
properties = this.getBaseKeyConfig(
|
||||
"DATE_STRING",
|
||||
"yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ",
|
||||
"",
|
||||
"",
|
||||
"yyyyMMddHH",
|
||||
"UTC");
|
||||
BuiltinKeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
|
||||
HoodieKey hk1 = keyGen.getKey(baseRecord);
|
||||
Assertions.assertEquals("2020040118", hk1.getPartitionPath());
|
||||
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("2020040118", keyGen.getPartitionPath(baseRow));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_ExpectsMatch_MultipleInputFormats_ISO8601WithMsZ_OutputTimezoneAsEST() throws IOException {
|
||||
baseRecord.put("createTime", "2020-04-01T13:01:33.123Z");
|
||||
properties = this.getBaseKeyConfig(
|
||||
"DATE_STRING",
|
||||
"yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ",
|
||||
"",
|
||||
"",
|
||||
"yyyyMMddHH",
|
||||
"EST");
|
||||
BuiltinKeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
|
||||
HoodieKey hk1 = keyGen.getKey(baseRecord);
|
||||
Assertions.assertEquals("2020040109", hk1.getPartitionPath());
|
||||
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("2020040109", keyGen.getPartitionPath(baseRow));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_Throws_MultipleInputFormats_InputDateNotMatchingFormats() throws IOException {
|
||||
baseRecord.put("createTime", "2020-04-01 13:01:33.123-05:00");
|
||||
properties = this.getBaseKeyConfig(
|
||||
"DATE_STRING",
|
||||
"yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ",
|
||||
"",
|
||||
"",
|
||||
"yyyyMMddHH",
|
||||
"UTC");
|
||||
BuiltinKeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
|
||||
Assertions.assertThrows(HoodieKeyGeneratorException.class, () -> keyGen.getKey(baseRecord));
|
||||
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
Assertions.assertThrows(HoodieKeyGeneratorException.class, () -> keyGen.getPartitionPath(baseRow));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_ExpectsMatch_MultipleInputFormats_ShortDate_OutputCustomDate() throws IOException {
|
||||
baseRecord.put("createTime", "20200401");
|
||||
properties = this.getBaseKeyConfig(
|
||||
"DATE_STRING",
|
||||
"yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ,yyyyMMdd",
|
||||
"",
|
||||
"UTC",
|
||||
"MM/dd/yyyy",
|
||||
"UTC");
|
||||
BuiltinKeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
|
||||
HoodieKey hk1 = keyGen.getKey(baseRecord);
|
||||
Assertions.assertEquals("04/01/2020", hk1.getPartitionPath());
|
||||
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("04/01/2020", keyGen.getPartitionPath(baseRow));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,68 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.testutils;
|
||||
|
||||
import org.apache.hudi.AvroConversionHelper;
|
||||
import org.apache.hudi.AvroConversionUtils;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericData;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.spark.sql.Row;
|
||||
import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema;
|
||||
import org.apache.spark.sql.types.StructType;
|
||||
|
||||
import scala.Function1;
|
||||
|
||||
public class KeyGeneratorTestUtilities {
|
||||
|
||||
public static String exampleSchema = "{\"type\": \"record\",\"name\": \"testrec\",\"fields\": [ "
|
||||
+ "{\"name\": \"timestamp\",\"type\": \"long\"},{\"name\": \"_row_key\", \"type\": \"string\"},"
|
||||
+ "{\"name\": \"ts_ms\", \"type\": \"string\"},"
|
||||
+ "{\"name\": \"pii_col\", \"type\": \"string\"}]}";
|
||||
|
||||
public static final String TEST_STRUCTNAME = "test_struct_name";
|
||||
public static final String TEST_RECORD_NAMESPACE = "test_record_namespace";
|
||||
public static Schema schema = new Schema.Parser().parse(exampleSchema);
|
||||
public static StructType structType = AvroConversionUtils.convertAvroSchemaToStructType(schema);
|
||||
|
||||
public GenericRecord getRecord() {
|
||||
GenericRecord record = new GenericData.Record(new Schema.Parser().parse(exampleSchema));
|
||||
record.put("timestamp", 4357686);
|
||||
record.put("_row_key", "key1");
|
||||
record.put("ts_ms", "2020-03-21");
|
||||
record.put("pii_col", "pi");
|
||||
return record;
|
||||
}
|
||||
|
||||
public static Row getRow(GenericRecord record) {
|
||||
return getRow(record, schema, structType);
|
||||
}
|
||||
|
||||
public static Row getRow(GenericRecord record, Schema schema, StructType structType) {
|
||||
Function1<Object, Object> converterFn = AvroConversionHelper.createConverterToRow(schema, structType);
|
||||
Row row = (Row) converterFn.apply(record);
|
||||
int fieldCount = structType.fieldNames().length;
|
||||
Object[] values = new Object[fieldCount];
|
||||
for (int i = 0; i < fieldCount; i++) {
|
||||
values[i] = row.get(i);
|
||||
}
|
||||
return new GenericRowWithSchema(values, structType);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user