[HUDI-2330][HUDI-2335] Adding support for merge-on-read tables (#3679)
- Inserts go into logs, hashed by Kafka and Hudi partitions - Fixed issues with the setupKafka script - Bumped up the default commit interval to 300 seconds - Minor renaming
This commit is contained in:
@@ -18,14 +18,40 @@
|
||||
|
||||
package org.apache.hudi.table;
|
||||
|
||||
import org.apache.hudi.client.WriteStatus;
|
||||
import org.apache.hudi.client.common.HoodieJavaEngineContext;
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.model.HoodieRecordPayload;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.table.action.HoodieWriteMetadata;
|
||||
import org.apache.hudi.table.action.commit.JavaBulkInsertPreppedCommitActionExecutor;
|
||||
import org.apache.hudi.table.action.deltacommit.JavaUpsertPreppedDeltaCommitActionExecutor;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class HoodieJavaMergeOnReadTable<T extends HoodieRecordPayload> extends HoodieJavaCopyOnWriteTable<T> {
|
||||
protected HoodieJavaMergeOnReadTable(HoodieWriteConfig config, HoodieEngineContext context, HoodieTableMetaClient metaClient) {
|
||||
super(config, context, metaClient);
|
||||
}
|
||||
// TODO not support yet.
|
||||
|
||||
@Override
|
||||
public HoodieWriteMetadata<List<WriteStatus>> upsertPrepped(HoodieEngineContext context,
|
||||
String instantTime,
|
||||
List<HoodieRecord<T>> preppedRecords) {
|
||||
return new JavaUpsertPreppedDeltaCommitActionExecutor<>((HoodieJavaEngineContext) context, config,
|
||||
this, instantTime, preppedRecords).execute();
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public HoodieWriteMetadata<List<WriteStatus>> bulkInsertPrepped(HoodieEngineContext context,
|
||||
String instantTime,
|
||||
List<HoodieRecord<T>> preppedRecords,
|
||||
Option<BulkInsertPartitioner<List<HoodieRecord<T>>>> bulkInsertPartitioner) {
|
||||
return new JavaBulkInsertPreppedCommitActionExecutor((HoodieJavaEngineContext) context, config,
|
||||
this, instantTime, preppedRecords, bulkInsertPartitioner).execute();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -29,9 +29,8 @@ import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
import org.apache.hudi.exception.HoodieNotSupportedException;
|
||||
import org.apache.hudi.index.JavaHoodieIndex;
|
||||
import org.apache.hudi.index.HoodieIndex;
|
||||
import org.apache.hudi.index.JavaHoodieIndex;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@@ -56,7 +55,7 @@ public abstract class HoodieJavaTable<T extends HoodieRecordPayload>
|
||||
case COPY_ON_WRITE:
|
||||
return new HoodieJavaCopyOnWriteTable<>(config, context, metaClient);
|
||||
case MERGE_ON_READ:
|
||||
throw new HoodieNotSupportedException("MERGE_ON_READ is not supported yet");
|
||||
return new HoodieJavaMergeOnReadTable<>(config, context, metaClient);
|
||||
default:
|
||||
throw new HoodieException("Unsupported table type :" + metaClient.getTableType());
|
||||
}
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.table.action.deltacommit;
|
||||
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.common.model.HoodieRecordPayload;
|
||||
import org.apache.hudi.common.model.WriteOperationType;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.action.commit.BaseJavaCommitActionExecutor;
|
||||
|
||||
public abstract class BaseJavaDeltaCommitActionExecutor<T extends HoodieRecordPayload<T>> extends BaseJavaCommitActionExecutor<T> {
|
||||
|
||||
public BaseJavaDeltaCommitActionExecutor(HoodieEngineContext context, HoodieWriteConfig config, HoodieTable table,
|
||||
String instantTime, WriteOperationType operationType) {
|
||||
super(context, config, table, instantTime, operationType);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,102 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.table.action.deltacommit;
|
||||
|
||||
import org.apache.hudi.client.WriteStatus;
|
||||
import org.apache.hudi.client.common.HoodieJavaEngineContext;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.model.HoodieRecordPayload;
|
||||
import org.apache.hudi.common.model.WriteOperationType;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieUpsertException;
|
||||
import org.apache.hudi.io.HoodieAppendHandle;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.action.HoodieWriteMetadata;
|
||||
import org.apache.hudi.table.action.commit.JavaBulkInsertHelper;
|
||||
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
public class JavaUpsertPreppedDeltaCommitActionExecutor<T extends HoodieRecordPayload<T>> extends BaseJavaDeltaCommitActionExecutor<T> {
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(JavaUpsertPreppedDeltaCommitActionExecutor.class);
|
||||
|
||||
private final List<HoodieRecord<T>> preppedInputRecords;
|
||||
|
||||
public JavaUpsertPreppedDeltaCommitActionExecutor(HoodieJavaEngineContext context, HoodieWriteConfig config, HoodieTable table,
|
||||
String instantTime, List<HoodieRecord<T>> preppedInputRecords) {
|
||||
super(context, config, table, instantTime, WriteOperationType.UPSERT_PREPPED);
|
||||
this.preppedInputRecords = preppedInputRecords;
|
||||
}
|
||||
|
||||
@Override
|
||||
public HoodieWriteMetadata<List<WriteStatus>> execute() {
|
||||
HoodieWriteMetadata<List<WriteStatus>> result = new HoodieWriteMetadata<>();
|
||||
// First group by target file id.
|
||||
HashMap<Pair<String, String>, List<HoodieRecord<T>>> recordsByFileId = new HashMap<>();
|
||||
List<HoodieRecord<T>> insertedRecords = new LinkedList<>();
|
||||
|
||||
// Split records into inserts and updates.
|
||||
for (HoodieRecord<T> record : preppedInputRecords) {
|
||||
if (!record.isCurrentLocationKnown()) {
|
||||
insertedRecords.add(record);
|
||||
} else {
|
||||
Pair<String, String> fileIdPartitionPath = Pair.of(record.getCurrentLocation().getFileId(), record.getPartitionPath());
|
||||
if (!recordsByFileId.containsKey(fileIdPartitionPath)) {
|
||||
recordsByFileId.put(fileIdPartitionPath, new LinkedList<>());
|
||||
}
|
||||
recordsByFileId.get(fileIdPartitionPath).add(record);
|
||||
}
|
||||
}
|
||||
LOG.info(String.format("Total update fileIDs %s, total inserts %s for commit %s",
|
||||
recordsByFileId.size(), insertedRecords.size(), instantTime));
|
||||
|
||||
List<WriteStatus> allWriteStatuses = new ArrayList<>();
|
||||
try {
|
||||
recordsByFileId.forEach((k, v) -> {
|
||||
HoodieAppendHandle<?, ?, ?, ?> appendHandle = new HoodieAppendHandle(config, instantTime, table,
|
||||
k.getRight(), k.getLeft(), v.iterator(), taskContextSupplier);
|
||||
appendHandle.doAppend();
|
||||
allWriteStatuses.addAll(appendHandle.close());
|
||||
});
|
||||
|
||||
if (insertedRecords.size() > 0) {
|
||||
HoodieWriteMetadata<List<WriteStatus>> insertResult = JavaBulkInsertHelper.newInstance()
|
||||
.bulkInsert(insertedRecords, instantTime, table, config, this, false, Option.empty());
|
||||
allWriteStatuses.addAll(insertResult.getWriteStatuses());
|
||||
}
|
||||
} catch (Throwable e) {
|
||||
if (e instanceof HoodieUpsertException) {
|
||||
throw e;
|
||||
}
|
||||
throw new HoodieUpsertException("Failed to upsert for commit time " + instantTime, e);
|
||||
}
|
||||
|
||||
updateIndex(allWriteStatuses, result);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user