[HUDI-4276] Reconcile schema-inject null values for missing fields and add new fields (#6017)
* [HUDI-4276] Reconcile schema-inject null values for missing fields and add new fields. * fix comments Co-authored-by: public (bdcee5037027) <mengtao0326@qq.com>
This commit is contained in:
@@ -39,6 +39,7 @@ import org.apache.hudi.client.heartbeat.HeartbeatUtils;
|
||||
import org.apache.hudi.client.transaction.TransactionManager;
|
||||
import org.apache.hudi.client.utils.TransactionUtils;
|
||||
import org.apache.hudi.common.HoodiePendingRollbackInfo;
|
||||
import org.apache.hudi.common.config.HoodieCommonConfig;
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
||||
import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
|
||||
@@ -276,15 +277,21 @@ public abstract class BaseHoodieWriteClient<T extends HoodieRecordPayload, I, K,
|
||||
TableSchemaResolver schemaUtil = new TableSchemaResolver(table.getMetaClient());
|
||||
String historySchemaStr = schemaUtil.getTableHistorySchemaStrFromCommitMetadata().orElse("");
|
||||
FileBasedInternalSchemaStorageManager schemasManager = new FileBasedInternalSchemaStorageManager(table.getMetaClient());
|
||||
if (!historySchemaStr.isEmpty()) {
|
||||
InternalSchema internalSchema = InternalSchemaUtils.searchSchema(Long.parseLong(instantTime),
|
||||
SerDeHelper.parseSchemas(historySchemaStr));
|
||||
if (!historySchemaStr.isEmpty() || Boolean.parseBoolean(config.getString(HoodieCommonConfig.RECONCILE_SCHEMA.key()))) {
|
||||
InternalSchema internalSchema;
|
||||
Schema avroSchema = HoodieAvroUtils.createHoodieWriteSchema(new Schema.Parser().parse(config.getSchema()));
|
||||
InternalSchema evolvedSchema = AvroSchemaEvolutionUtils.evolveSchemaFromNewAvroSchema(avroSchema, internalSchema);
|
||||
if (historySchemaStr.isEmpty()) {
|
||||
internalSchema = AvroInternalSchemaConverter.convert(avroSchema);
|
||||
internalSchema.setSchemaId(Long.parseLong(instantTime));
|
||||
} else {
|
||||
internalSchema = InternalSchemaUtils.searchSchema(Long.parseLong(instantTime),
|
||||
SerDeHelper.parseSchemas(historySchemaStr));
|
||||
}
|
||||
InternalSchema evolvedSchema = AvroSchemaEvolutionUtils.reconcileSchema(avroSchema, internalSchema);
|
||||
if (evolvedSchema.equals(internalSchema)) {
|
||||
metadata.addMetadata(SerDeHelper.LATEST_SCHEMA, SerDeHelper.toJson(evolvedSchema));
|
||||
//TODO save history schema by metaTable
|
||||
schemasManager.persistHistorySchemaStr(instantTime, historySchemaStr);
|
||||
schemasManager.persistHistorySchemaStr(instantTime, historySchemaStr.isEmpty() ? SerDeHelper.inheritSchemas(evolvedSchema, "") : historySchemaStr);
|
||||
} else {
|
||||
evolvedSchema.setSchemaId(Long.parseLong(instantTime));
|
||||
String newSchemaStr = SerDeHelper.toJson(evolvedSchema);
|
||||
|
||||
@@ -100,7 +100,7 @@ public class HoodieMergeHelper<T extends HoodieRecordPayload> extends
|
||||
// TODO support bootstrap
|
||||
if (querySchemaOpt.isPresent() && !baseFile.getBootstrapBaseFile().isPresent()) {
|
||||
// check implicitly add columns, and position reorder(spark sql may change cols order)
|
||||
InternalSchema querySchema = AvroSchemaEvolutionUtils.evolveSchemaFromNewAvroSchema(readSchema, querySchemaOpt.get(), true);
|
||||
InternalSchema querySchema = AvroSchemaEvolutionUtils.reconcileSchema(readSchema, querySchemaOpt.get());
|
||||
long commitInstantTime = Long.valueOf(FSUtils.getCommitTime(mergeHandle.getOldFilePath().getName()));
|
||||
InternalSchema writeInternalSchema = InternalSchemaCache.searchSchemaAndCache(commitInstantTime, table.getMetaClient(), table.getConfig().getInternalSchemaCacheEnable());
|
||||
if (writeInternalSchema.isEmptySchema()) {
|
||||
|
||||
Reference in New Issue
Block a user