[HUDI-3365] Make sure Metadata Table records are updated appropriately on HDFS (#4739)
- This change makes sure MT records are updated appropriately on HDFS: previously after Log File append operations MT records were updated w/ just the size of the deltas being appended to the original files, which have been found to be the cause of issues in case of Rollbacks that were instead updating MT with records bearing the full file-size. - To make sure that we hedge against similar issues going f/w, this PR alleviates this discrepancy and streamlines the flow of MT table always ingesting records bearing full file-sizes.
This commit is contained in:
@@ -18,6 +18,8 @@
|
||||
|
||||
package org.apache.hudi.client;
|
||||
|
||||
import com.codahale.metrics.Timer;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hudi.client.common.HoodieJavaEngineContext;
|
||||
import org.apache.hudi.client.embedded.EmbeddedTimelineService;
|
||||
import org.apache.hudi.common.data.HoodieList;
|
||||
@@ -30,7 +32,6 @@ import org.apache.hudi.common.model.HoodieWriteStat;
|
||||
import org.apache.hudi.common.model.WriteOperationType;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieNotSupportedException;
|
||||
@@ -40,9 +41,7 @@ import org.apache.hudi.table.BulkInsertPartitioner;
|
||||
import org.apache.hudi.table.HoodieJavaTable;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.action.HoodieWriteMetadata;
|
||||
|
||||
import com.codahale.metrics.Timer;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hudi.table.upgrade.JavaUpgradeDowngradeHelper;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
@@ -52,14 +51,14 @@ public class HoodieJavaWriteClient<T extends HoodieRecordPayload> extends
|
||||
BaseHoodieWriteClient<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> {
|
||||
|
||||
public HoodieJavaWriteClient(HoodieEngineContext context, HoodieWriteConfig clientConfig) {
|
||||
super(context, clientConfig);
|
||||
super(context, clientConfig, JavaUpgradeDowngradeHelper.getInstance());
|
||||
}
|
||||
|
||||
public HoodieJavaWriteClient(HoodieEngineContext context,
|
||||
HoodieWriteConfig writeConfig,
|
||||
boolean rollbackPending,
|
||||
Option<EmbeddedTimelineService> timelineService) {
|
||||
super(context, writeConfig, timelineService);
|
||||
super(context, writeConfig, timelineService, JavaUpgradeDowngradeHelper.getInstance());
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -99,7 +98,7 @@ public class HoodieJavaWriteClient<T extends HoodieRecordPayload> extends
|
||||
public List<WriteStatus> upsert(List<HoodieRecord<T>> records,
|
||||
String instantTime) {
|
||||
HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> table =
|
||||
getTableAndInitCtx(WriteOperationType.UPSERT, instantTime);
|
||||
initTable(WriteOperationType.UPSERT, Option.ofNullable(instantTime));
|
||||
table.validateUpsertSchema();
|
||||
preWrite(instantTime, WriteOperationType.UPSERT, table.getMetaClient());
|
||||
HoodieWriteMetadata<List<WriteStatus>> result = table.upsert(context, instantTime, records);
|
||||
@@ -113,7 +112,7 @@ public class HoodieJavaWriteClient<T extends HoodieRecordPayload> extends
|
||||
public List<WriteStatus> upsertPreppedRecords(List<HoodieRecord<T>> preppedRecords,
|
||||
String instantTime) {
|
||||
HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> table =
|
||||
getTableAndInitCtx(WriteOperationType.UPSERT_PREPPED, instantTime);
|
||||
initTable(WriteOperationType.UPSERT_PREPPED, Option.ofNullable(instantTime));
|
||||
table.validateUpsertSchema();
|
||||
preWrite(instantTime, WriteOperationType.UPSERT_PREPPED, table.getMetaClient());
|
||||
HoodieWriteMetadata<List<WriteStatus>> result = table.upsertPrepped(context,instantTime, preppedRecords);
|
||||
@@ -123,7 +122,7 @@ public class HoodieJavaWriteClient<T extends HoodieRecordPayload> extends
|
||||
@Override
|
||||
public List<WriteStatus> insert(List<HoodieRecord<T>> records, String instantTime) {
|
||||
HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> table =
|
||||
getTableAndInitCtx(WriteOperationType.INSERT, instantTime);
|
||||
initTable(WriteOperationType.INSERT, Option.ofNullable(instantTime));
|
||||
table.validateUpsertSchema();
|
||||
preWrite(instantTime, WriteOperationType.INSERT, table.getMetaClient());
|
||||
HoodieWriteMetadata<List<WriteStatus>> result = table.insert(context, instantTime, records);
|
||||
@@ -137,7 +136,7 @@ public class HoodieJavaWriteClient<T extends HoodieRecordPayload> extends
|
||||
public List<WriteStatus> insertPreppedRecords(List<HoodieRecord<T>> preppedRecords,
|
||||
String instantTime) {
|
||||
HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> table =
|
||||
getTableAndInitCtx(WriteOperationType.INSERT_PREPPED, instantTime);
|
||||
initTable(WriteOperationType.INSERT_PREPPED, Option.ofNullable(instantTime));
|
||||
table.validateInsertSchema();
|
||||
preWrite(instantTime, WriteOperationType.INSERT_PREPPED, table.getMetaClient());
|
||||
HoodieWriteMetadata<List<WriteStatus>> result = table.insertPrepped(context,instantTime, preppedRecords);
|
||||
@@ -169,7 +168,7 @@ public class HoodieJavaWriteClient<T extends HoodieRecordPayload> extends
|
||||
String instantTime,
|
||||
Option<BulkInsertPartitioner<List<HoodieRecord<T>>>> bulkInsertPartitioner) {
|
||||
HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> table =
|
||||
getTableAndInitCtx(WriteOperationType.BULK_INSERT_PREPPED, instantTime);
|
||||
initTable(WriteOperationType.BULK_INSERT_PREPPED, Option.ofNullable(instantTime));
|
||||
table.validateInsertSchema();
|
||||
preWrite(instantTime, WriteOperationType.BULK_INSERT_PREPPED, table.getMetaClient());
|
||||
HoodieWriteMetadata<List<WriteStatus>> result = table.bulkInsertPrepped(context, instantTime, preppedRecords, bulkInsertPartitioner);
|
||||
@@ -180,7 +179,7 @@ public class HoodieJavaWriteClient<T extends HoodieRecordPayload> extends
|
||||
public List<WriteStatus> delete(List<HoodieKey> keys,
|
||||
String instantTime) {
|
||||
HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> table =
|
||||
getTableAndInitCtx(WriteOperationType.DELETE, instantTime);
|
||||
initTable(WriteOperationType.DELETE, Option.ofNullable(instantTime));
|
||||
preWrite(instantTime, WriteOperationType.DELETE, table.getMetaClient());
|
||||
HoodieWriteMetadata<List<WriteStatus>> result = table.delete(context,instantTime, keys);
|
||||
return postWrite(result, instantTime, table);
|
||||
@@ -233,23 +232,11 @@ public class HoodieJavaWriteClient<T extends HoodieRecordPayload> extends
|
||||
}
|
||||
|
||||
@Override
|
||||
protected HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> getTableAndInitCtx(WriteOperationType operationType, String instantTime) {
|
||||
HoodieTableMetaClient metaClient = createMetaClient(true);
|
||||
protected HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> doInitTable(HoodieTableMetaClient metaClient, Option<String> instantTime) {
|
||||
// new JavaUpgradeDowngrade(metaClient, config, context).run(metaClient, HoodieTableVersion.current(), config, context, instantTime);
|
||||
return getTableAndInitCtx(metaClient, operationType);
|
||||
|
||||
// Create a Hoodie table which encapsulated the commits and files visible
|
||||
return HoodieJavaTable.create(config, (HoodieJavaEngineContext) context, metaClient);
|
||||
}
|
||||
|
||||
private HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> getTableAndInitCtx(HoodieTableMetaClient metaClient, WriteOperationType operationType) {
|
||||
if (operationType == WriteOperationType.DELETE) {
|
||||
setWriteSchemaForDeletes(metaClient);
|
||||
}
|
||||
// Create a Hoodie table which encapsulated the commits and files visible
|
||||
HoodieJavaTable<T> table = HoodieJavaTable.create(config, (HoodieJavaEngineContext) context, metaClient);
|
||||
if (table.getMetaClient().getCommitActionType().equals(HoodieTimeline.COMMIT_ACTION)) {
|
||||
writeTimer = metrics.getCommitCtx();
|
||||
} else {
|
||||
writeTimer = metrics.getDeltaCommitCtx();
|
||||
}
|
||||
return table;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.table.upgrade;
|
||||
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
|
||||
import org.apache.hudi.table.HoodieJavaTable;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
|
||||
/**
|
||||
* Java upgrade and downgrade helper
|
||||
*/
|
||||
public class JavaUpgradeDowngradeHelper implements SupportsUpgradeDowngrade {
|
||||
|
||||
private static final JavaUpgradeDowngradeHelper SINGLETON_INSTANCE =
|
||||
new JavaUpgradeDowngradeHelper();
|
||||
|
||||
private JavaUpgradeDowngradeHelper() {}
|
||||
|
||||
public static JavaUpgradeDowngradeHelper getInstance() {
|
||||
return SINGLETON_INSTANCE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public HoodieTable getTable(HoodieWriteConfig config, HoodieEngineContext context) {
|
||||
return HoodieJavaTable.create(config, context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getPartitionColumns(HoodieWriteConfig config) {
|
||||
return config.getProps().getProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key());
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user