[HUDI-912] Refactor and relocate KeyGenerator to support more engines (#2200)

* [HUDI-912] Refactor and relocate KeyGenerator to support more engines * Rename KeyGenerators
2020-11-03 05:12:51 +08:00
parent 59f995a3f5
commit d160abb437
38 changed files with 1038 additions and 524 deletions
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/BuiltinKeyGenerator.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/BuiltinKeyGenerator.java
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.keygen;
+
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hudi.ApiMaturityLevel;
+import org.apache.hudi.AvroConversionHelper;
+import org.apache.hudi.PublicAPIMethod;
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.exception.HoodieKeyException;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.types.StructType;
+import scala.Function1;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Base class for the built-in key generators. Contains methods structured for
+ * code reuse amongst them.
+ */
+public abstract class BuiltinKeyGenerator extends BaseKeyGenerator implements SparkKeyGeneratorInterface {
+
+  private static final String STRUCT_NAME = "hoodieRowTopLevelField";
+  private static final String NAMESPACE = "hoodieRow";
+  private transient Function1<Object, Object> converterFn = null;
+  protected StructType structType;
+
+  protected Map<String, List<Integer>> recordKeyPositions = new HashMap<>();
+  protected Map<String, List<Integer>> partitionPathPositions = new HashMap<>();
+
+  protected BuiltinKeyGenerator(TypedProperties config) {
+    super(config);
+  }
+
+  /**
+   * Fetch record key from {@link Row}.
+   * @param row instance of {@link Row} from which record key is requested.
+   * @return the record key of interest from {@link Row}.
+   */
+  @Override
+  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
+  public String getRecordKey(Row row) {
+    if (null == converterFn) {
+      converterFn = AvroConversionHelper.createConverterToAvro(row.schema(), STRUCT_NAME, NAMESPACE);
+    }
+    GenericRecord genericRecord = (GenericRecord) converterFn.apply(row);
+    return getKey(genericRecord).getRecordKey();
+  }
+
+  /**
+   * Fetch partition path from {@link Row}.
+   * @param row instance of {@link Row} from which partition path is requested
+   * @return the partition path of interest from {@link Row}.
+   */
+  @Override
+  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
+  public String getPartitionPath(Row row) {
+    if (null == converterFn) {
+      converterFn = AvroConversionHelper.createConverterToAvro(row.schema(), STRUCT_NAME, NAMESPACE);
+    }
+    GenericRecord genericRecord = (GenericRecord) converterFn.apply(row);
+    return getKey(genericRecord).getPartitionPath();
+  }
+
+  void buildFieldPositionMapIfNeeded(StructType structType) {
+    if (this.structType == null) {
+      // parse simple fields
+      getRecordKeyFields().stream()
+          .filter(f -> !(f.contains(".")))
+          .forEach(f -> {
+            if (structType.getFieldIndex(f).isDefined()) {
+              recordKeyPositions.put(f, Collections.singletonList((Integer) (structType.getFieldIndex(f).get())));
+            } else {
+              throw new HoodieKeyException("recordKey value not found for field: \"" + f + "\"");
+            }
+          });
+      // parse nested fields
+      getRecordKeyFields().stream()
+          .filter(f -> f.contains("."))
+          .forEach(f -> recordKeyPositions.put(f, RowKeyGeneratorHelper.getNestedFieldIndices(structType, f, true)));
+      // parse simple fields
+      if (getPartitionPathFields() != null) {
+        getPartitionPathFields().stream().filter(f -> !f.isEmpty()).filter(f -> !(f.contains(".")))
+            .forEach(f -> {
+              if (structType.getFieldIndex(f).isDefined()) {
+                partitionPathPositions.put(f,
+                    Collections.singletonList((Integer) (structType.getFieldIndex(f).get())));
+              } else {
+                partitionPathPositions.put(f, Collections.singletonList(-1));
+              }
+            });
+        // parse nested fields
+        getPartitionPathFields().stream().filter(f -> !f.isEmpty()).filter(f -> f.contains("."))
+            .forEach(f -> partitionPathPositions.put(f,
+                RowKeyGeneratorHelper.getNestedFieldIndices(structType, f, false)));
+      }
+      this.structType = structType;
+    }
+  }
+}
+
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/ComplexKeyGenerator.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/ComplexKeyGenerator.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.keygen;
+
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
+import org.apache.spark.sql.Row;
+
+import java.util.Arrays;
+import java.util.stream.Collectors;
+
+/**
+ * Complex key generator, which takes names of fields to be used for recordKey and partitionPath as configs.
+ */
+public class ComplexKeyGenerator extends BuiltinKeyGenerator {
+
+  private final ComplexAvroKeyGenerator complexAvroKeyGenerator;
+
+  public ComplexKeyGenerator(TypedProperties props) {
+    super(props);
+    this.recordKeyFields = Arrays.stream(props.getString(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY)
+        .split(",")).map(String::trim).filter(s -> !s.isEmpty()).collect(Collectors.toList());
+    this.partitionPathFields = Arrays.stream(props.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY)
+        .split(",")).map(String::trim).filter(s -> !s.isEmpty()).collect(Collectors.toList());
+    complexAvroKeyGenerator = new ComplexAvroKeyGenerator(props);
+  }
+
+  @Override
+  public String getRecordKey(GenericRecord record) {
+    return complexAvroKeyGenerator.getRecordKey(record);
+  }
+
+  @Override
+  public String getPartitionPath(GenericRecord record) {
+    return complexAvroKeyGenerator.getPartitionPath(record);
+  }
+
+  @Override
+  public String getRecordKey(Row row) {
+    buildFieldPositionMapIfNeeded(row.schema());
+    return RowKeyGeneratorHelper.getRecordKeyFromRow(row, getRecordKeyFields(), recordKeyPositions, true);
+  }
+
+  @Override
+  public String getPartitionPath(Row row) {
+    buildFieldPositionMapIfNeeded(row.schema());
+    return RowKeyGeneratorHelper.getPartitionPathFromRow(row, getPartitionPathFields(),
+        hiveStylePartitioning, partitionPathPositions);
+  }
+
+}
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/CustomKeyGenerator.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/CustomKeyGenerator.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.keygen;
+
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.exception.HoodieKeyException;
+import org.apache.hudi.exception.HoodieKeyGeneratorException;
+import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
+import org.apache.spark.sql.Row;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.stream.Collectors;
+
+/**
+ * This is a generic implementation of KeyGenerator where users can configure record key as a single field or a combination of fields. Similarly partition path can be configured to have multiple
+ * fields or only one field. This class expects value for prop "hoodie.datasource.write.partitionpath.field" in a specific format. For example:
+ *
+ * properties.put("hoodie.datasource.write.partitionpath.field", "field1:PartitionKeyType1,field2:PartitionKeyType2").
+ *
+ * The complete partition path is created as <value for field1 basis PartitionKeyType1>/<value for field2 basis PartitionKeyType2> and so on.
+ *
+ * Few points to consider: 1. If you want to customize some partition path field on a timestamp basis, you can use field1:timestampBased 2. If you simply want to have the value of your configured
+ * field in the partition path, use field1:simple 3. If you want your table to be non partitioned, simply leave it as blank.
+ *
+ * RecordKey is internally generated using either SimpleKeyGenerator or ComplexKeyGenerator.
+ */
+public class CustomKeyGenerator extends BuiltinKeyGenerator {
+
+  private final CustomAvroKeyGenerator customAvroKeyGenerator;
+
+  public CustomKeyGenerator(TypedProperties props) {
+    super(props);
+    this.recordKeyFields = Arrays.stream(props.getString(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY).split(",")).map(String::trim).collect(Collectors.toList());
+    this.partitionPathFields = Arrays.stream(props.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY).split(",")).map(String::trim).collect(Collectors.toList());
+    customAvroKeyGenerator = new CustomAvroKeyGenerator(props);
+  }
+
+  @Override
+  public String getRecordKey(GenericRecord record) {
+    return customAvroKeyGenerator.getRecordKey(record);
+  }
+
+  @Override
+  public String getPartitionPath(GenericRecord record) {
+    return customAvroKeyGenerator.getPartitionPath(record);
+  }
+
+  @Override
+  public String getRecordKey(Row row) {
+    validateRecordKeyFields();
+    return getRecordKeyFields().size() == 1
+        ? new SimpleKeyGenerator(config).getRecordKey(row)
+        : new ComplexKeyGenerator(config).getRecordKey(row);
+  }
+
+  @Override
+  public String getPartitionPath(Row row) {
+    return getPartitionPath(Option.empty(), Option.of(row));
+  }
+
+  private String getPartitionPath(Option<GenericRecord> record, Option<Row> row) {
+    if (getPartitionPathFields() == null) {
+      throw new HoodieKeyException("Unable to find field names for partition path in cfg");
+    }
+
+    String partitionPathField;
+    StringBuilder partitionPath = new StringBuilder();
+
+    //Corresponds to no partition case
+    if (getPartitionPathFields().size() == 1 && getPartitionPathFields().get(0).isEmpty()) {
+      return "";
+    }
+    for (String field : getPartitionPathFields()) {
+      String[] fieldWithType = field.split(customAvroKeyGenerator.getSplitRegex());
+      if (fieldWithType.length != 2) {
+        throw new HoodieKeyGeneratorException("Unable to find field names for partition path in proper format");
+      }
+
+      partitionPathField = fieldWithType[0];
+      CustomAvroKeyGenerator.PartitionKeyType keyType = CustomAvroKeyGenerator.PartitionKeyType.valueOf(fieldWithType[1].toUpperCase());
+      switch (keyType) {
+        case SIMPLE:
+          if (record.isPresent()) {
+            partitionPath.append(new SimpleKeyGenerator(config, partitionPathField).getPartitionPath(record.get()));
+          } else {
+            partitionPath.append(new SimpleKeyGenerator(config, partitionPathField).getPartitionPath(row.get()));
+          }
+          break;
+        case TIMESTAMP:
+          try {
+            if (record.isPresent()) {
+              partitionPath.append(new TimestampBasedKeyGenerator(config, partitionPathField).getPartitionPath(record.get()));
+            } else {
+              partitionPath.append(new TimestampBasedKeyGenerator(config, partitionPathField).getPartitionPath(row.get()));
+            }
+          } catch (IOException ioe) {
+            throw new HoodieKeyGeneratorException("Unable to initialise TimestampBasedKeyGenerator class");
+          }
+          break;
+        default:
+          throw new HoodieKeyGeneratorException("Please provide valid PartitionKeyType with fields! You provided: " + keyType);
+      }
+
+      partitionPath.append(customAvroKeyGenerator.getDefaultPartitionPathSeparator());
+    }
+    partitionPath.deleteCharAt(partitionPath.length() - 1);
+    return partitionPath.toString();
+  }
+
+  private void validateRecordKeyFields() {
+    if (getRecordKeyFields() == null || getRecordKeyFields().isEmpty()) {
+      throw new HoodieKeyException("Unable to find field names for record key in cfg");
+    }
+  }
+}
+
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/GlobalDeleteKeyGenerator.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/GlobalDeleteKeyGenerator.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.keygen;
+
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
+import org.apache.spark.sql.Row;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Key generator for deletes using global indices. Global index deletes do not require partition value so this key generator
+ * avoids using partition value for generating HoodieKey.
+ */
+public class GlobalDeleteKeyGenerator extends BuiltinKeyGenerator {
+
+  private final GlobalAvroDeleteKeyGenerator globalAvroDeleteKeyGenerator;
+  public GlobalDeleteKeyGenerator(TypedProperties config) {
+    super(config);
+    this.recordKeyFields = Arrays.asList(config.getString(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY).split(","));
+    globalAvroDeleteKeyGenerator = new GlobalAvroDeleteKeyGenerator(config);
+  }
+
+  @Override
+  public String getRecordKey(GenericRecord record) {
+    return globalAvroDeleteKeyGenerator.getRecordKey(record);
+  }
+
+  @Override
+  public String getPartitionPath(GenericRecord record) {
+    return globalAvroDeleteKeyGenerator.getPartitionPath(record);
+  }
+
+  @Override
+  public List<String> getPartitionPathFields() {
+    return new ArrayList<>();
+  }
+
+  @Override
+  public String getRecordKey(Row row) {
+    buildFieldPositionMapIfNeeded(row.schema());
+    return RowKeyGeneratorHelper.getRecordKeyFromRow(row, getRecordKeyFields(), recordKeyPositions, true);
+  }
+
+  @Override
+  public String getPartitionPath(Row row) {
+    return globalAvroDeleteKeyGenerator.getEmptyPartition();
+  }
+}
+
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/NonpartitionedKeyGenerator.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/NonpartitionedKeyGenerator.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.keygen;
+
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.spark.sql.Row;
+
+import java.util.List;
+
+/**
+ * Simple Key generator for unpartitioned Hive Tables.
+ */
+public class NonpartitionedKeyGenerator extends SimpleKeyGenerator {
+
+  private final NonpartitionedAvroKeyGenerator nonpartitionedAvroKeyGenerator;
+
+  public NonpartitionedKeyGenerator(TypedProperties config) {
+    super(config);
+    nonpartitionedAvroKeyGenerator = new NonpartitionedAvroKeyGenerator(config);
+  }
+
+  @Override
+  public String getPartitionPath(GenericRecord record) {
+    return nonpartitionedAvroKeyGenerator.getPartitionPath(record);
+  }
+
+  @Override
+  public List<String> getPartitionPathFields() {
+    return nonpartitionedAvroKeyGenerator.getPartitionPathFields();
+  }
+
+  @Override
+  public String getPartitionPath(Row row) {
+    return nonpartitionedAvroKeyGenerator.getEmptyPartition();
+  }
+
+}
+
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/RowKeyGeneratorHelper.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/RowKeyGeneratorHelper.java
@@ -0,0 +1,200 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.keygen;
+
+import org.apache.hudi.exception.HoodieKeyException;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+import scala.Option;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import static org.apache.hudi.keygen.KeyGenUtils.DEFAULT_PARTITION_PATH;
+import static org.apache.hudi.keygen.KeyGenUtils.DEFAULT_PARTITION_PATH_SEPARATOR;
+import static org.apache.hudi.keygen.KeyGenUtils.EMPTY_RECORDKEY_PLACEHOLDER;
+import static org.apache.hudi.keygen.KeyGenUtils.NULL_RECORDKEY_PLACEHOLDER;
+
+/**
+ * Helper class to fetch fields from Row.
+ */
+public class RowKeyGeneratorHelper {
+
+  /**
+   * Generates record key for the corresponding {@link Row}.
+   * @param row instance of {@link Row} of interest
+   * @param recordKeyFields record key fields as a list
+   * @param recordKeyPositions record key positions for the corresponding record keys in {@code recordKeyFields}
+   * @param prefixFieldName {@code true} if field name need to be prefixed in the returned result. {@code false} otherwise.
+   * @return the record key thus generated
+   */
+  public static String getRecordKeyFromRow(Row row, List<String> recordKeyFields, Map<String, List<Integer>> recordKeyPositions, boolean prefixFieldName) {
+    AtomicBoolean keyIsNullOrEmpty = new AtomicBoolean(true);
+    String toReturn = recordKeyFields.stream().map(field -> {
+      String val = null;
+      List<Integer> fieldPositions = recordKeyPositions.get(field);
+      if (fieldPositions.size() == 1) { // simple field
+        Integer fieldPos = fieldPositions.get(0);
+        if (row.isNullAt(fieldPos)) {
+          val = NULL_RECORDKEY_PLACEHOLDER;
+        } else {
+          val = row.getAs(field).toString();
+          if (val.isEmpty()) {
+            val = EMPTY_RECORDKEY_PLACEHOLDER;
+          } else {
+            keyIsNullOrEmpty.set(false);
+          }
+        }
+      } else { // nested fields
+        val = getNestedFieldVal(row, recordKeyPositions.get(field)).toString();
+        if (!val.contains(NULL_RECORDKEY_PLACEHOLDER) && !val.contains(EMPTY_RECORDKEY_PLACEHOLDER)) {
+          keyIsNullOrEmpty.set(false);
+        }
+      }
+      return prefixFieldName ? (field + ":" + val) : val;
+    }).collect(Collectors.joining(","));
+    if (keyIsNullOrEmpty.get()) {
+      throw new HoodieKeyException("recordKey value: \"" + toReturn + "\" for fields: \"" + Arrays.toString(recordKeyFields.toArray()) + "\" cannot be null or empty.");
+    }
+    return toReturn;
+  }
+
+  /**
+   * Generates partition path for the corresponding {@link Row}.
+   * @param row instance of {@link Row} of interest
+   * @param partitionPathFields partition path fields as a list
+   * @param hiveStylePartitioning {@code true} if hive style partitioning is set. {@code false} otherwise
+   * @param partitionPathPositions partition path positions for the corresponding fields in {@code partitionPathFields}
+   * @return the generated partition path for the row
+   */
+  public static String getPartitionPathFromRow(Row row, List<String> partitionPathFields, boolean hiveStylePartitioning, Map<String, List<Integer>> partitionPathPositions) {
+    return IntStream.range(0, partitionPathFields.size()).mapToObj(idx -> {
+      String field = partitionPathFields.get(idx);
+      String val = null;
+      List<Integer> fieldPositions = partitionPathPositions.get(field);
+      if (fieldPositions.size() == 1) { // simple
+        Integer fieldPos = fieldPositions.get(0);
+        // for partition path, if field is not found, index will be set to -1
+        if (fieldPos == -1 || row.isNullAt(fieldPos)) {
+          val = DEFAULT_PARTITION_PATH;
+        } else {
+          val = row.getAs(field).toString();
+          if (val.isEmpty()) {
+            val = DEFAULT_PARTITION_PATH;
+          }
+        }
+        if (hiveStylePartitioning) {
+          val = field + "=" + val;
+        }
+      } else { // nested
+        Object nestedVal = getNestedFieldVal(row, partitionPathPositions.get(field));
+        if (nestedVal.toString().contains(NULL_RECORDKEY_PLACEHOLDER) || nestedVal.toString().contains(EMPTY_RECORDKEY_PLACEHOLDER)) {
+          val = hiveStylePartitioning ? field + "=" + DEFAULT_PARTITION_PATH : DEFAULT_PARTITION_PATH;
+        } else {
+          val = hiveStylePartitioning ? field + "=" + nestedVal.toString() : nestedVal.toString();
+        }
+      }
+      return val;
+    }).collect(Collectors.joining(DEFAULT_PARTITION_PATH_SEPARATOR));
+  }
+
+  /**
+   * Fetch the field value located at the positions requested for.
+   * @param row instance of {@link Row} of interest
+   * @param positions tree style positions where the leaf node need to be fetched and returned
+   * @return the field value as per the positions requested for.
+   */
+  public static Object getNestedFieldVal(Row row, List<Integer> positions) {
+    if (positions.size() == 1 && positions.get(0) == -1) {
+      return DEFAULT_PARTITION_PATH;
+    }
+    int index = 0;
+    int totalCount = positions.size();
+    Row valueToProcess = row;
+    Object toReturn = null;
+
+    while (index < totalCount) {
+      if (index < totalCount - 1) {
+        if (valueToProcess.isNullAt(positions.get(index))) {
+          toReturn = NULL_RECORDKEY_PLACEHOLDER;
+          break;
+        }
+        valueToProcess = (Row) valueToProcess.get(positions.get(index));
+      } else { // last index
+        if (null != valueToProcess.getAs(positions.get(index)) && valueToProcess.getAs(positions.get(index)).toString().isEmpty()) {
+          toReturn = EMPTY_RECORDKEY_PLACEHOLDER;
+          break;
+        }
+        toReturn = valueToProcess.getAs(positions.get(index));
+      }
+      index++;
+    }
+    return toReturn;
+  }
+
+  /**
+   * Generate the tree style positions for the field requested for as per the defined struct type.
+   * @param structType schema of interest
+   * @param field field of interest for which the positions are requested for
+   * @param isRecordKey {@code true} if the field requested for is a record key. {@code false} incase of a partition path.
+   * @return the positions of the field as per the struct type.
+   */
+  public static List<Integer> getNestedFieldIndices(StructType structType, String field, boolean isRecordKey) {
+    String[] slices = field.split("\\.");
+    List<Integer> positions = new ArrayList<>();
+    int index = 0;
+    int totalCount = slices.length;
+    while (index < totalCount) {
+      String slice = slices[index];
+      Option<Object> curIndexOpt = structType.getFieldIndex(slice);
+      if (curIndexOpt.isDefined()) {
+        int curIndex = (int) curIndexOpt.get();
+        positions.add(curIndex);
+        final StructField nestedField = structType.fields()[curIndex];
+        if (index < totalCount - 1) {
+          if (!(nestedField.dataType() instanceof StructType)) {
+            if (isRecordKey) {
+              throw new HoodieKeyException("Nested field should be of type StructType " + nestedField);
+            } else {
+              positions = Collections.singletonList(-1);
+              break;
+            }
+          }
+          structType = (StructType) nestedField.dataType();
+        }
+      } else {
+        if (isRecordKey) {
+          throw new HoodieKeyException("Can't find " + slice + " in StructType for the field " + field);
+        } else {
+          positions = Collections.singletonList(-1);
+          break;
+        }
+      }
+      index++;
+    }
+    return positions;
+  }
+}
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/SimpleKeyGenerator.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/SimpleKeyGenerator.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.keygen;
+
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
+import org.apache.spark.sql.Row;
+
+import java.util.Collections;
+
+/**
+ * Simple key generator, which takes names of fields to be used for recordKey and partitionPath as configs.
+ */
+public class SimpleKeyGenerator extends BuiltinKeyGenerator {
+
+  private final SimpleAvroKeyGenerator simpleAvroKeyGenerator;
+
+  public SimpleKeyGenerator(TypedProperties props) {
+    this(props, props.getString(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY),
+        props.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY));
+  }
+
+  SimpleKeyGenerator(TypedProperties props, String partitionPathField) {
+    this(props, null, partitionPathField);
+  }
+
+  SimpleKeyGenerator(TypedProperties props, String recordKeyField, String partitionPathField) {
+    super(props);
+    this.recordKeyFields = recordKeyField == null
+        ? Collections.emptyList()
+        : Collections.singletonList(recordKeyField);
+    this.partitionPathFields = Collections.singletonList(partitionPathField);
+    simpleAvroKeyGenerator = new SimpleAvroKeyGenerator(props, recordKeyField, partitionPathField);
+  }
+
+  @Override
+  public String getRecordKey(GenericRecord record) {
+    return simpleAvroKeyGenerator.getRecordKey(record);
+  }
+
+  @Override
+  public String getPartitionPath(GenericRecord record) {
+    return simpleAvroKeyGenerator.getPartitionPath(record);
+  }
+
+  @Override
+  public String getRecordKey(Row row) {
+    buildFieldPositionMapIfNeeded(row.schema());
+    return RowKeyGeneratorHelper.getRecordKeyFromRow(row, getRecordKeyFields(), recordKeyPositions, false);
+  }
+
+  @Override
+  public String getPartitionPath(Row row) {
+    buildFieldPositionMapIfNeeded(row.schema());
+    return RowKeyGeneratorHelper.getPartitionPathFromRow(row, getPartitionPathFields(),
+        hiveStylePartitioning, partitionPathPositions);
+  }
+}
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/TimestampBasedKeyGenerator.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/TimestampBasedKeyGenerator.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.keygen;
+
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.exception.HoodieKeyGeneratorException;
+import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
+import org.apache.spark.sql.Row;
+
+import java.io.IOException;
+
+import static org.apache.hudi.keygen.KeyGenUtils.DEFAULT_PARTITION_PATH;
+import static org.apache.hudi.keygen.KeyGenUtils.EMPTY_RECORDKEY_PLACEHOLDER;
+import static org.apache.hudi.keygen.KeyGenUtils.NULL_RECORDKEY_PLACEHOLDER;
+
+/**
+ * Key generator, that relies on timestamps for partitioning field. Still picks record key by name.
+ */
+public class TimestampBasedKeyGenerator extends SimpleKeyGenerator {
+
+  private final TimestampBasedAvroKeyGenerator timestampBasedAvroKeyGenerator;
+
+  public TimestampBasedKeyGenerator(TypedProperties config) throws IOException {
+    this(config, config.getString(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY),
+        config.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY));
+  }
+
+  TimestampBasedKeyGenerator(TypedProperties config, String partitionPathField) throws IOException {
+    this(config, null, partitionPathField);
+  }
+
+  TimestampBasedKeyGenerator(TypedProperties config, String recordKeyField, String partitionPathField) throws IOException {
+    super(config, recordKeyField, partitionPathField);
+    timestampBasedAvroKeyGenerator = new TimestampBasedAvroKeyGenerator(config, recordKeyField, partitionPathField);
+  }
+
+  @Override
+  public String getPartitionPath(GenericRecord record) {
+    return timestampBasedAvroKeyGenerator.getPartitionPath(record);
+  }
+
+  @Override
+  public String getRecordKey(Row row) {
+    buildFieldPositionMapIfNeeded(row.schema());
+    return RowKeyGeneratorHelper.getRecordKeyFromRow(row, getRecordKeyFields(), recordKeyPositions, false);
+  }
+
+  @Override
+  public String getPartitionPath(Row row) {
+    Object fieldVal = null;
+    buildFieldPositionMapIfNeeded(row.schema());
+    Object partitionPathFieldVal = RowKeyGeneratorHelper.getNestedFieldVal(row, partitionPathPositions.get(getPartitionPathFields().get(0)));
+    try {
+      if (partitionPathFieldVal == null || partitionPathFieldVal.toString().contains(DEFAULT_PARTITION_PATH) || partitionPathFieldVal.toString().contains(NULL_RECORDKEY_PLACEHOLDER)
+          || partitionPathFieldVal.toString().contains(EMPTY_RECORDKEY_PLACEHOLDER)) {
+        fieldVal = timestampBasedAvroKeyGenerator.getDefaultPartitionVal();
+      } else {
+        fieldVal = partitionPathFieldVal;
+      }
+      return timestampBasedAvroKeyGenerator.getPartitionPath(fieldVal);
+    } catch (Exception e) {
+      throw new HoodieKeyGeneratorException("Unable to parse input partition field :" + fieldVal, e);
+    }
+  }
+}
--- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionHelper.scala
+++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionHelper.scala
@@ -0,0 +1,368 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi
+
+import java.nio.ByteBuffer
+import java.sql.{Date, Timestamp}
+import java.util
+
+import org.apache.avro.Conversions.DecimalConversion
+import org.apache.avro.LogicalTypes.{TimestampMicros, TimestampMillis}
+import org.apache.avro.Schema.Type._
+import org.apache.avro.generic.GenericData.{Fixed, Record}
+import org.apache.avro.generic.{GenericData, GenericFixed, GenericRecord}
+import org.apache.avro.{LogicalTypes, Schema}
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.avro.{IncompatibleSchemaException, SchemaConverters}
+import org.apache.spark.sql.catalyst.expressions.GenericRow
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.types._
+
+import scala.collection.JavaConverters._
+
+object AvroConversionHelper {
+
+  private def createDecimal(decimal: java.math.BigDecimal, precision: Int, scale: Int): Decimal = {
+    if (precision <= Decimal.MAX_LONG_DIGITS) {
+      // Constructs a `Decimal` with an unscaled `Long` value if possible.
+      Decimal(decimal.unscaledValue().longValue(), precision, scale)
+    } else {
+      // Otherwise, resorts to an unscaled `BigInteger` instead.
+      Decimal(decimal, precision, scale)
+    }
+  }
+
+  /**
+    *
+    * Returns a converter function to convert row in avro format to GenericRow of catalyst.
+    *
+    * @param sourceAvroSchema Source schema before conversion inferred from avro file by passed in
+    *                         by user.
+    * @param targetSqlType    Target catalyst sql type after the conversion.
+    * @return returns a converter function to convert row in avro format to GenericRow of catalyst.
+    */
+  def createConverterToRow(sourceAvroSchema: Schema,
+                           targetSqlType: DataType): AnyRef => AnyRef = {
+
+    def createConverter(avroSchema: Schema, sqlType: DataType, path: List[String]): AnyRef => AnyRef = {
+      val avroType = avroSchema.getType
+      (sqlType, avroType) match {
+        // Avro strings are in Utf8, so we have to call toString on them
+        case (StringType, STRING) | (StringType, ENUM) =>
+          (item: AnyRef) => if (item == null) null else item.toString
+        // Byte arrays are reused by avro, so we have to make a copy of them.
+        case (IntegerType, INT) | (BooleanType, BOOLEAN) | (DoubleType, DOUBLE) |
+             (FloatType, FLOAT) | (LongType, LONG) =>
+          identity
+        case (BinaryType, FIXED) =>
+          (item: AnyRef) =>
+            if (item == null) {
+              null
+            } else {
+              item.asInstanceOf[Fixed].bytes().clone()
+            }
+        case (BinaryType, BYTES) =>
+          (item: AnyRef) =>
+            if (item == null) {
+              null
+            } else {
+              val byteBuffer = item.asInstanceOf[ByteBuffer]
+              val bytes = new Array[Byte](byteBuffer.remaining)
+              byteBuffer.get(bytes)
+              bytes
+            }
+        case (d: DecimalType, FIXED) =>
+          (item: AnyRef) =>
+            if (item == null) {
+              null
+            } else {
+              val decimalConversion = new DecimalConversion
+              val bigDecimal = decimalConversion.fromFixed(item.asInstanceOf[GenericFixed], avroSchema,
+                LogicalTypes.decimal(d.precision, d.scale))
+              createDecimal(bigDecimal, d.precision, d.scale)
+            }
+        case (d: DecimalType, BYTES) =>
+          (item: AnyRef) =>
+            if (item == null) {
+              null
+            } else {
+              val decimalConversion = new DecimalConversion
+              val bigDecimal = decimalConversion.fromBytes(item.asInstanceOf[ByteBuffer], avroSchema,
+                LogicalTypes.decimal(d.precision, d.scale))
+              createDecimal(bigDecimal, d.precision, d.scale)
+            }
+        case (DateType, INT) =>
+          (item: AnyRef) =>
+            if (item == null) {
+              null
+            } else {
+              item match {
+                case integer: Integer => DateTimeUtils.toJavaDate(integer)
+                case _ => new Date(item.asInstanceOf[Long])
+              }
+            }
+        case (TimestampType, LONG) =>
+          (item: AnyRef) =>
+            if (item == null) {
+              null
+            } else {
+              avroSchema.getLogicalType match {
+                case _: TimestampMillis =>
+                  new Timestamp(item.asInstanceOf[Long])
+                case _: TimestampMicros =>
+                  new Timestamp(item.asInstanceOf[Long] / 1000)
+                case null =>
+                  new Timestamp(item.asInstanceOf[Long])
+                case other =>
+                  throw new IncompatibleSchemaException(
+                    s"Cannot convert Avro logical type $other to Catalyst Timestamp type.")
+              }
+            }
+        case (struct: StructType, RECORD) =>
+          val length = struct.fields.length
+          val converters = new Array[AnyRef => AnyRef](length)
+          val avroFieldIndexes = new Array[Int](length)
+          var i = 0
+          while (i < length) {
+            val sqlField = struct.fields(i)
+            val avroField = avroSchema.getField(sqlField.name)
+            if (avroField != null) {
+              val converter = createConverter(avroField.schema(), sqlField.dataType,
+                path :+ sqlField.name)
+              converters(i) = converter
+              avroFieldIndexes(i) = avroField.pos()
+            } else if (!sqlField.nullable) {
+              throw new IncompatibleSchemaException(
+                s"Cannot find non-nullable field ${sqlField.name} at path ${path.mkString(".")} " +
+                  "in Avro schema\n" +
+                  s"Source Avro schema: $sourceAvroSchema.\n" +
+                  s"Target Catalyst type: $targetSqlType")
+            }
+            i += 1
+          }
+
+          (item: AnyRef) => {
+            if (item == null) {
+              null
+            } else {
+              val record = item.asInstanceOf[GenericRecord]
+
+              val result = new Array[Any](length)
+              var i = 0
+              while (i < converters.length) {
+                if (converters(i) != null) {
+                  val converter = converters(i)
+                  result(i) = converter(record.get(avroFieldIndexes(i)))
+                }
+                i += 1
+              }
+              new GenericRow(result)
+            }
+          }
+        case (arrayType: ArrayType, ARRAY) =>
+          val elementConverter = createConverter(avroSchema.getElementType, arrayType.elementType,
+            path)
+          val allowsNull = arrayType.containsNull
+          (item: AnyRef) => {
+            if (item == null) {
+              null
+            } else {
+              item.asInstanceOf[java.lang.Iterable[AnyRef]].asScala.map { element =>
+                if (element == null && !allowsNull) {
+                  throw new RuntimeException(s"Array value at path ${path.mkString(".")} is not " +
+                    "allowed to be null")
+                } else {
+                  elementConverter(element)
+                }
+              }
+            }
+          }
+        case (mapType: MapType, MAP) if mapType.keyType == StringType =>
+          val valueConverter = createConverter(avroSchema.getValueType, mapType.valueType, path)
+          val allowsNull = mapType.valueContainsNull
+          (item: AnyRef) => {
+            if (item == null) {
+              null
+            } else {
+              item.asInstanceOf[java.util.Map[AnyRef, AnyRef]].asScala.map { x =>
+                if (x._2 == null && !allowsNull) {
+                  throw new RuntimeException(s"Map value at path ${path.mkString(".")} is not " +
+                    "allowed to be null")
+                } else {
+                  (x._1.toString, valueConverter(x._2))
+                }
+              }.toMap
+            }
+          }
+        case (sqlType, UNION) =>
+          if (avroSchema.getTypes.asScala.exists(_.getType == NULL)) {
+            val remainingUnionTypes = avroSchema.getTypes.asScala.filterNot(_.getType == NULL)
+            if (remainingUnionTypes.size == 1) {
+              createConverter(remainingUnionTypes.head, sqlType, path)
+            } else {
+              createConverter(Schema.createUnion(remainingUnionTypes.asJava), sqlType, path)
+            }
+          } else avroSchema.getTypes.asScala.map(_.getType) match {
+            case Seq(_) => createConverter(avroSchema.getTypes.get(0), sqlType, path)
+            case Seq(a, b) if Set(a, b) == Set(INT, LONG) && sqlType == LongType =>
+              (item: AnyRef) => {
+                item match {
+                  case null => null
+                  case l: java.lang.Long => l
+                  case i: java.lang.Integer => new java.lang.Long(i.longValue())
+                }
+              }
+            case Seq(a, b) if Set(a, b) == Set(FLOAT, DOUBLE) && sqlType == DoubleType =>
+              (item: AnyRef) => {
+                item match {
+                  case null => null
+                  case d: java.lang.Double => d
+                  case f: java.lang.Float => new java.lang.Double(f.doubleValue())
+                }
+              }
+            case other =>
+              sqlType match {
+                case t: StructType if t.fields.length == avroSchema.getTypes.size =>
+                  val fieldConverters = t.fields.zip(avroSchema.getTypes.asScala).map {
+                    case (field, schema) =>
+                      createConverter(schema, field.dataType, path :+ field.name)
+                  }
+
+                  (item: AnyRef) =>
+                    if (item == null) {
+                      null
+                    } else {
+                      val i = GenericData.get().resolveUnion(avroSchema, item)
+                      val converted = new Array[Any](fieldConverters.length)
+                      converted(i) = fieldConverters(i)(item)
+                      new GenericRow(converted)
+                    }
+                case _ => throw new IncompatibleSchemaException(
+                  s"Cannot convert Avro schema to catalyst type because schema at path " +
+                    s"${path.mkString(".")} is not compatible " +
+                    s"(avroType = $other, sqlType = $sqlType). \n" +
+                    s"Source Avro schema: $sourceAvroSchema.\n" +
+                    s"Target Catalyst type: $targetSqlType")
+              }
+          }
+        case (left, right) =>
+          throw new IncompatibleSchemaException(
+            s"Cannot convert Avro schema to catalyst type because schema at path " +
+              s"${path.mkString(".")} is not compatible (avroType = $left, sqlType = $right). \n" +
+              s"Source Avro schema: $sourceAvroSchema.\n" +
+              s"Target Catalyst type: $targetSqlType")
+      }
+    }
+
+    createConverter(sourceAvroSchema, targetSqlType, List.empty[String])
+  }
+
+  def createConverterToAvro(dataType: DataType,
+                            structName: String,
+                            recordNamespace: String): Any => Any = {
+    dataType match {
+      case BinaryType => (item: Any) =>
+        item match {
+          case null => null
+          case bytes: Array[Byte] => ByteBuffer.wrap(bytes)
+        }
+      case IntegerType | LongType |
+           FloatType | DoubleType | StringType | BooleanType => identity
+      case ByteType => (item: Any) =>
+        if (item == null) null else item.asInstanceOf[Byte].intValue
+      case ShortType => (item: Any) =>
+        if (item == null) null else item.asInstanceOf[Short].intValue
+      case dec: DecimalType =>
+        val schema = SchemaConverters.toAvroType(dec, nullable = false, structName, recordNamespace)
+        (item: Any) => {
+          Option(item).map { _ =>
+            val bigDecimalValue = item.asInstanceOf[java.math.BigDecimal]
+            val decimalConversions = new DecimalConversion()
+            decimalConversions.toFixed(bigDecimalValue, schema, LogicalTypes.decimal(dec.precision, dec.scale))
+          }.orNull
+        }
+      case TimestampType => (item: Any) =>
+        // Convert time to microseconds since spark-avro by default converts TimestampType to
+        // Avro Logical TimestampMicros
+        Option(item).map(_.asInstanceOf[Timestamp].getTime * 1000).orNull
+      case DateType => (item: Any) =>
+        Option(item).map(_.asInstanceOf[Date].toLocalDate.toEpochDay.toInt).orNull
+      case ArrayType(elementType, _) =>
+        val elementConverter = createConverterToAvro(
+          elementType,
+          structName,
+          recordNamespace)
+        (item: Any) => {
+          if (item == null) {
+            null
+          } else {
+            val sourceArray = item.asInstanceOf[Seq[Any]]
+            val sourceArraySize = sourceArray.size
+            val targetList = new util.ArrayList[Any](sourceArraySize)
+            var idx = 0
+            while (idx < sourceArraySize) {
+              targetList.add(elementConverter(sourceArray(idx)))
+              idx += 1
+            }
+            targetList
+          }
+        }
+      case MapType(StringType, valueType, _) =>
+        val valueConverter = createConverterToAvro(
+          valueType,
+          structName,
+          recordNamespace)
+        (item: Any) => {
+          if (item == null) {
+            null
+          } else {
+            val javaMap = new util.HashMap[String, Any]()
+            item.asInstanceOf[Map[String, Any]].foreach { case (key, value) =>
+              javaMap.put(key, valueConverter(value))
+            }
+            javaMap
+          }
+        }
+      case structType: StructType =>
+        val schema: Schema = SchemaConverters.toAvroType(structType, nullable = false, structName, recordNamespace)
+        val childNameSpace = if (recordNamespace != "") s"$recordNamespace.$structName" else structName
+        val fieldConverters = structType.fields.map(field =>
+          createConverterToAvro(
+            field.dataType,
+            field.name,
+            childNameSpace))
+        (item: Any) => {
+          if (item == null) {
+            null
+          } else {
+            val record = new Record(schema)
+            val convertersIterator = fieldConverters.iterator
+            val fieldNamesIterator = dataType.asInstanceOf[StructType].fieldNames.iterator
+            val rowIterator = item.asInstanceOf[Row].toSeq.iterator
+
+            while (convertersIterator.hasNext) {
+              val converter = convertersIterator.next()
+              record.put(fieldNamesIterator.next(), converter(rowIterator.next()))
+            }
+            record
+          }
+        }
+    }
+  }
+}
--- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala
+++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi
+
+import org.apache.avro.Schema
+import org.apache.avro.generic.{GenericRecord, GenericRecordBuilder, IndexedRecord}
+import org.apache.hudi.avro.HoodieAvroUtils
+import org.apache.hudi.common.model.HoodieKey
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.avro.SchemaConverters
+import org.apache.spark.sql.catalyst.encoders.RowEncoder
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
+
+import scala.collection.JavaConverters._
+
+object AvroConversionUtils {
+
+  def createRdd(df: DataFrame, structName: String, recordNamespace: String): RDD[GenericRecord] = {
+    val avroSchema = convertStructTypeToAvroSchema(df.schema, structName, recordNamespace)
+    createRdd(df, avroSchema, structName, recordNamespace)
+  }
+
+  def createRdd(df: DataFrame, avroSchema: Schema, structName: String, recordNamespace: String)
+  : RDD[GenericRecord] = {
+    // Use the Avro schema to derive the StructType which has the correct nullability information
+    val dataType = SchemaConverters.toSqlType(avroSchema).dataType.asInstanceOf[StructType]
+    val encoder = RowEncoder.apply(dataType).resolveAndBind()
+    df.queryExecution.toRdd.map(encoder.fromRow)
+      .mapPartitions { records =>
+        if (records.isEmpty) Iterator.empty
+        else {
+          val convertor = AvroConversionHelper.createConverterToAvro(dataType, structName, recordNamespace)
+          records.map { x => convertor(x).asInstanceOf[GenericRecord] }
+        }
+      }
+  }
+
+  def createRddForDeletes(df: DataFrame, rowField: String, partitionField: String): RDD[HoodieKey] = {
+    df.rdd.map(row => new HoodieKey(row.getAs[String](rowField), row.getAs[String](partitionField)))
+  }
+
+  def createDataFrame(rdd: RDD[GenericRecord], schemaStr: String, ss: SparkSession): Dataset[Row] = {
+    if (rdd.isEmpty()) {
+      ss.emptyDataFrame
+    } else {
+      ss.createDataFrame(rdd.mapPartitions { records =>
+        if (records.isEmpty) Iterator.empty
+        else {
+          val schema = new Schema.Parser().parse(schemaStr)
+          val dataType = convertAvroSchemaToStructType(schema)
+          val convertor = AvroConversionHelper.createConverterToRow(schema, dataType)
+          records.map { x => convertor(x).asInstanceOf[Row] }
+        }
+      }, convertAvroSchemaToStructType(new Schema.Parser().parse(schemaStr)))
+    }
+  }
+
+  def convertStructTypeToAvroSchema(structType: StructType,
+                                    structName: String,
+                                    recordNamespace: String): Schema = {
+    SchemaConverters.toAvroType(structType, nullable = false, structName, recordNamespace)
+  }
+
+  def convertAvroSchemaToStructType(avroSchema: Schema): StructType = {
+    SchemaConverters.toSqlType(avroSchema).dataType.asInstanceOf[StructType]
+  }
+
+  def buildAvroRecordBySchema(record: IndexedRecord,
+                              requiredSchema: Schema,
+                              requiredPos: List[Int],
+                              recordBuilder: GenericRecordBuilder): GenericRecord = {
+    val requiredFields = requiredSchema.getFields.asScala
+    assert(requiredFields.length == requiredPos.length)
+    val positionIterator = requiredPos.iterator
+    requiredFields.foreach(f => recordBuilder.set(f, record.get(positionIterator.next())))
+    recordBuilder.build()
+  }
+
+  def getAvroRecordNameAndNamespace(tableName: String): (String, String) = {
+    val name = HoodieAvroUtils.sanitizeName(tableName)
+    (s"${name}_record", s"hoodie.${name}")
+  }
+}