1
0

[HUDI-2330][HUDI-2335] Adding support for merge-on-read tables (#3679)

- Inserts go into logs, hashed by Kafka and Hudi partitions
 - Fixed issues with the setupKafka script
 - Bumped up the default commit interval to 300 seconds
 - Minor renaming
This commit is contained in:
vinoth chandar
2021-09-16 15:24:34 -07:00
committed by GitHub
parent b8dad628e5
commit 57d5da68aa
16 changed files with 315 additions and 124 deletions

View File

@@ -18,14 +18,40 @@
package org.apache.hudi.table;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.client.common.HoodieJavaEngineContext;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.action.HoodieWriteMetadata;
import org.apache.hudi.table.action.commit.JavaBulkInsertPreppedCommitActionExecutor;
import org.apache.hudi.table.action.deltacommit.JavaUpsertPreppedDeltaCommitActionExecutor;
import java.util.List;
public class HoodieJavaMergeOnReadTable<T extends HoodieRecordPayload> extends HoodieJavaCopyOnWriteTable<T> {
protected HoodieJavaMergeOnReadTable(HoodieWriteConfig config, HoodieEngineContext context, HoodieTableMetaClient metaClient) {
super(config, context, metaClient);
}
// TODO not support yet.
@Override
public HoodieWriteMetadata<List<WriteStatus>> upsertPrepped(HoodieEngineContext context,
String instantTime,
List<HoodieRecord<T>> preppedRecords) {
return new JavaUpsertPreppedDeltaCommitActionExecutor<>((HoodieJavaEngineContext) context, config,
this, instantTime, preppedRecords).execute();
}
@Override
public HoodieWriteMetadata<List<WriteStatus>> bulkInsertPrepped(HoodieEngineContext context,
String instantTime,
List<HoodieRecord<T>> preppedRecords,
Option<BulkInsertPartitioner<List<HoodieRecord<T>>>> bulkInsertPartitioner) {
return new JavaBulkInsertPreppedCommitActionExecutor((HoodieJavaEngineContext) context, config,
this, instantTime, preppedRecords, bulkInsertPartitioner).execute();
}
}

View File

@@ -29,9 +29,8 @@ import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieNotSupportedException;
import org.apache.hudi.index.JavaHoodieIndex;
import org.apache.hudi.index.HoodieIndex;
import org.apache.hudi.index.JavaHoodieIndex;
import java.util.List;
@@ -56,7 +55,7 @@ public abstract class HoodieJavaTable<T extends HoodieRecordPayload>
case COPY_ON_WRITE:
return new HoodieJavaCopyOnWriteTable<>(config, context, metaClient);
case MERGE_ON_READ:
throw new HoodieNotSupportedException("MERGE_ON_READ is not supported yet");
return new HoodieJavaMergeOnReadTable<>(config, context, metaClient);
default:
throw new HoodieException("Unsupported table type :" + metaClient.getTableType());
}

View File

@@ -0,0 +1,35 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hudi.table.action.deltacommit;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.commit.BaseJavaCommitActionExecutor;
public abstract class BaseJavaDeltaCommitActionExecutor<T extends HoodieRecordPayload<T>> extends BaseJavaCommitActionExecutor<T> {
public BaseJavaDeltaCommitActionExecutor(HoodieEngineContext context, HoodieWriteConfig config, HoodieTable table,
String instantTime, WriteOperationType operationType) {
super(context, config, table, instantTime, operationType);
}
}

View File

@@ -0,0 +1,102 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hudi.table.action.deltacommit;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.client.common.HoodieJavaEngineContext;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieUpsertException;
import org.apache.hudi.io.HoodieAppendHandle;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.HoodieWriteMetadata;
import org.apache.hudi.table.action.commit.JavaBulkInsertHelper;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
public class JavaUpsertPreppedDeltaCommitActionExecutor<T extends HoodieRecordPayload<T>> extends BaseJavaDeltaCommitActionExecutor<T> {
private static final Logger LOG = LogManager.getLogger(JavaUpsertPreppedDeltaCommitActionExecutor.class);
private final List<HoodieRecord<T>> preppedInputRecords;
public JavaUpsertPreppedDeltaCommitActionExecutor(HoodieJavaEngineContext context, HoodieWriteConfig config, HoodieTable table,
String instantTime, List<HoodieRecord<T>> preppedInputRecords) {
super(context, config, table, instantTime, WriteOperationType.UPSERT_PREPPED);
this.preppedInputRecords = preppedInputRecords;
}
@Override
public HoodieWriteMetadata<List<WriteStatus>> execute() {
HoodieWriteMetadata<List<WriteStatus>> result = new HoodieWriteMetadata<>();
// First group by target file id.
HashMap<Pair<String, String>, List<HoodieRecord<T>>> recordsByFileId = new HashMap<>();
List<HoodieRecord<T>> insertedRecords = new LinkedList<>();
// Split records into inserts and updates.
for (HoodieRecord<T> record : preppedInputRecords) {
if (!record.isCurrentLocationKnown()) {
insertedRecords.add(record);
} else {
Pair<String, String> fileIdPartitionPath = Pair.of(record.getCurrentLocation().getFileId(), record.getPartitionPath());
if (!recordsByFileId.containsKey(fileIdPartitionPath)) {
recordsByFileId.put(fileIdPartitionPath, new LinkedList<>());
}
recordsByFileId.get(fileIdPartitionPath).add(record);
}
}
LOG.info(String.format("Total update fileIDs %s, total inserts %s for commit %s",
recordsByFileId.size(), insertedRecords.size(), instantTime));
List<WriteStatus> allWriteStatuses = new ArrayList<>();
try {
recordsByFileId.forEach((k, v) -> {
HoodieAppendHandle<?, ?, ?, ?> appendHandle = new HoodieAppendHandle(config, instantTime, table,
k.getRight(), k.getLeft(), v.iterator(), taskContextSupplier);
appendHandle.doAppend();
allWriteStatuses.addAll(appendHandle.close());
});
if (insertedRecords.size() > 0) {
HoodieWriteMetadata<List<WriteStatus>> insertResult = JavaBulkInsertHelper.newInstance()
.bulkInsert(insertedRecords, instantTime, table, config, this, false, Option.empty());
allWriteStatuses.addAll(insertResult.getWriteStatuses());
}
} catch (Throwable e) {
if (e instanceof HoodieUpsertException) {
throw e;
}
throw new HoodieUpsertException("Failed to upsert for commit time " + instantTime, e);
}
updateIndex(allWriteStatuses, result);
return result;
}
}