Improving out of box experience for data source
- Fixes #246 - Bump up default parallelism to 1500, to handle large upserts - Add docs on s3 confuration & tuning tips with tested spark knobs - Fix bug to not duplicate hoodie metadata fields when input dataframe is another hoodie dataset - Improve speed of ROTablePathFilter by removing directory check - Move to spark-avro 4.0 to handle issue with nested fields with same name - Keep AvroConversionUtils in sync with spark-avro 4.0
This commit is contained in:
committed by
vinoth chandar
parent
a97814462d
commit
85dd265b7b
@@ -43,6 +43,16 @@ object AvroConversionUtils {
|
||||
}
|
||||
}
|
||||
|
||||
def getNewRecordNamespace(elementDataType: DataType,
|
||||
currentRecordNamespace: String,
|
||||
elementName: String): String = {
|
||||
|
||||
elementDataType match {
|
||||
case StructType(_) => s"$currentRecordNamespace.$elementName"
|
||||
case _ => currentRecordNamespace
|
||||
}
|
||||
}
|
||||
|
||||
def createConverterToAvro(dataType: DataType,
|
||||
structName: String,
|
||||
recordNamespace: String): (Any) => Any = {
|
||||
@@ -60,7 +70,10 @@ object AvroConversionUtils {
|
||||
case DateType => (item: Any) =>
|
||||
if (item == null) null else item.asInstanceOf[Date].getTime
|
||||
case ArrayType(elementType, _) =>
|
||||
val elementConverter = createConverterToAvro(elementType, structName, recordNamespace)
|
||||
val elementConverter = createConverterToAvro(
|
||||
elementType,
|
||||
structName,
|
||||
getNewRecordNamespace(elementType, recordNamespace, structName))
|
||||
(item: Any) => {
|
||||
if (item == null) {
|
||||
null
|
||||
@@ -77,7 +90,10 @@ object AvroConversionUtils {
|
||||
}
|
||||
}
|
||||
case MapType(StringType, valueType, _) =>
|
||||
val valueConverter = createConverterToAvro(valueType, structName, recordNamespace)
|
||||
val valueConverter = createConverterToAvro(
|
||||
valueType,
|
||||
structName,
|
||||
getNewRecordNamespace(valueType, recordNamespace, structName))
|
||||
(item: Any) => {
|
||||
if (item == null) {
|
||||
null
|
||||
@@ -94,7 +110,10 @@ object AvroConversionUtils {
|
||||
val schema: Schema = SchemaConverters.convertStructToAvro(
|
||||
structType, builder, recordNamespace)
|
||||
val fieldConverters = structType.fields.map(field =>
|
||||
createConverterToAvro(field.dataType, field.name, recordNamespace))
|
||||
createConverterToAvro(
|
||||
field.dataType,
|
||||
field.name,
|
||||
getNewRecordNamespace(field.dataType, recordNamespace, field.name)))
|
||||
(item: Any) => {
|
||||
if (item == null) {
|
||||
null
|
||||
|
||||
Reference in New Issue
Block a user