[HUDI-4276] Reconcile schema-inject null values for missing fields and add new fields (#6017)
* [HUDI-4276] Reconcile schema-inject null values for missing fields and add new fields. * fix comments Co-authored-by: public (bdcee5037027) <mengtao0326@qq.com>
This commit is contained in:
@@ -39,6 +39,7 @@ import org.apache.hudi.client.heartbeat.HeartbeatUtils;
|
||||
import org.apache.hudi.client.transaction.TransactionManager;
|
||||
import org.apache.hudi.client.utils.TransactionUtils;
|
||||
import org.apache.hudi.common.HoodiePendingRollbackInfo;
|
||||
import org.apache.hudi.common.config.HoodieCommonConfig;
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
||||
import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
|
||||
@@ -276,15 +277,21 @@ public abstract class BaseHoodieWriteClient<T extends HoodieRecordPayload, I, K,
|
||||
TableSchemaResolver schemaUtil = new TableSchemaResolver(table.getMetaClient());
|
||||
String historySchemaStr = schemaUtil.getTableHistorySchemaStrFromCommitMetadata().orElse("");
|
||||
FileBasedInternalSchemaStorageManager schemasManager = new FileBasedInternalSchemaStorageManager(table.getMetaClient());
|
||||
if (!historySchemaStr.isEmpty()) {
|
||||
InternalSchema internalSchema = InternalSchemaUtils.searchSchema(Long.parseLong(instantTime),
|
||||
SerDeHelper.parseSchemas(historySchemaStr));
|
||||
if (!historySchemaStr.isEmpty() || Boolean.parseBoolean(config.getString(HoodieCommonConfig.RECONCILE_SCHEMA.key()))) {
|
||||
InternalSchema internalSchema;
|
||||
Schema avroSchema = HoodieAvroUtils.createHoodieWriteSchema(new Schema.Parser().parse(config.getSchema()));
|
||||
InternalSchema evolvedSchema = AvroSchemaEvolutionUtils.evolveSchemaFromNewAvroSchema(avroSchema, internalSchema);
|
||||
if (historySchemaStr.isEmpty()) {
|
||||
internalSchema = AvroInternalSchemaConverter.convert(avroSchema);
|
||||
internalSchema.setSchemaId(Long.parseLong(instantTime));
|
||||
} else {
|
||||
internalSchema = InternalSchemaUtils.searchSchema(Long.parseLong(instantTime),
|
||||
SerDeHelper.parseSchemas(historySchemaStr));
|
||||
}
|
||||
InternalSchema evolvedSchema = AvroSchemaEvolutionUtils.reconcileSchema(avroSchema, internalSchema);
|
||||
if (evolvedSchema.equals(internalSchema)) {
|
||||
metadata.addMetadata(SerDeHelper.LATEST_SCHEMA, SerDeHelper.toJson(evolvedSchema));
|
||||
//TODO save history schema by metaTable
|
||||
schemasManager.persistHistorySchemaStr(instantTime, historySchemaStr);
|
||||
schemasManager.persistHistorySchemaStr(instantTime, historySchemaStr.isEmpty() ? SerDeHelper.inheritSchemas(evolvedSchema, "") : historySchemaStr);
|
||||
} else {
|
||||
evolvedSchema.setSchemaId(Long.parseLong(instantTime));
|
||||
String newSchemaStr = SerDeHelper.toJson(evolvedSchema);
|
||||
|
||||
@@ -100,7 +100,7 @@ public class HoodieMergeHelper<T extends HoodieRecordPayload> extends
|
||||
// TODO support bootstrap
|
||||
if (querySchemaOpt.isPresent() && !baseFile.getBootstrapBaseFile().isPresent()) {
|
||||
// check implicitly add columns, and position reorder(spark sql may change cols order)
|
||||
InternalSchema querySchema = AvroSchemaEvolutionUtils.evolveSchemaFromNewAvroSchema(readSchema, querySchemaOpt.get(), true);
|
||||
InternalSchema querySchema = AvroSchemaEvolutionUtils.reconcileSchema(readSchema, querySchemaOpt.get());
|
||||
long commitInstantTime = Long.valueOf(FSUtils.getCommitTime(mergeHandle.getOldFilePath().getName()));
|
||||
InternalSchema writeInternalSchema = InternalSchemaCache.searchSchemaAndCache(commitInstantTime, table.getMetaClient(), table.getConfig().getInternalSchemaCacheEnable());
|
||||
if (writeInternalSchema.isEmptySchema()) {
|
||||
|
||||
@@ -21,7 +21,6 @@ package org.apache.hudi
|
||||
import org.apache.avro.Schema
|
||||
import org.apache.avro.generic.GenericRecord
|
||||
import org.apache.hadoop.fs.{FileSystem, Path}
|
||||
import org.apache.hudi.avro.HoodieAvroUtils.rewriteRecord
|
||||
import org.apache.hudi.client.utils.SparkRowSerDe
|
||||
import org.apache.hudi.common.config.TypedProperties
|
||||
import org.apache.hudi.common.model.HoodieRecord
|
||||
@@ -39,8 +38,10 @@ import org.apache.spark.sql.catalyst.encoders.RowEncoder
|
||||
import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, Literal}
|
||||
import org.apache.spark.sql.sources._
|
||||
import org.apache.spark.sql.types.{StringType, StructField, StructType}
|
||||
|
||||
import java.util.Properties
|
||||
|
||||
import org.apache.hudi.avro.HoodieAvroUtils
|
||||
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
object HoodieSparkUtils extends SparkAdapterSupport {
|
||||
@@ -162,11 +163,11 @@ object HoodieSparkUtils extends SparkAdapterSupport {
|
||||
if (rows.isEmpty) {
|
||||
Iterator.empty
|
||||
} else {
|
||||
val readerAvroSchema = new Schema.Parser().parse(readerAvroSchemaStr)
|
||||
val transform: GenericRecord => GenericRecord =
|
||||
if (sameSchema) identity
|
||||
else {
|
||||
val readerAvroSchema = new Schema.Parser().parse(readerAvroSchemaStr)
|
||||
rewriteRecord(_, readerAvroSchema)
|
||||
HoodieAvroUtils.rewriteRecordDeep(_, readerAvroSchema)
|
||||
}
|
||||
|
||||
// Since caller might request to get records in a different ("evolved") schema, we will be rewriting from
|
||||
|
||||
Reference in New Issue
Block a user