1
0

[HUDI-1446] Support skip bootstrapIndex's init in abstract fs view init (#2520)

Co-authored-by: zhongliang <zhongliang@kuaishou.com>
Co-authored-by: Sivabalan Narayanan <sivabala@uber.com>
This commit is contained in:
xoln ann
2021-05-14 12:29:26 +08:00
committed by GitHub
parent ad77cf42ba
commit 12443e4187
6 changed files with 93 additions and 8 deletions

View File

@@ -23,6 +23,7 @@ import org.apache.hudi.client.bootstrap.selector.MetadataOnlyBootstrapModeSelect
import org.apache.hudi.client.bootstrap.translator.IdentityBootstrapPartitionPathTranslator;
import org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex;
import org.apache.hudi.common.config.DefaultHoodieConfig;
import org.apache.hudi.common.table.HoodieTableConfig;
import java.io.File;
import java.io.FileReader;
@@ -135,7 +136,7 @@ public class HoodieBootstrapConfig extends DefaultHoodieConfig {
BOOTSTRAP_MODE_SELECTOR_REGEX_MODE, DEFAULT_BOOTSTRAP_MODE_SELECTOR_REGEX_MODE);
BootstrapMode.valueOf(props.getProperty(BOOTSTRAP_MODE_SELECTOR_REGEX_MODE));
setDefaultOnCondition(props, !props.containsKey(BOOTSTRAP_INDEX_CLASS_PROP), BOOTSTRAP_INDEX_CLASS_PROP,
DEFAULT_BOOTSTRAP_INDEX_CLASS);
HoodieTableConfig.getDefaultBootstrapIndexClass(props));
setDefaultOnCondition(props, !props.containsKey(FULL_BOOTSTRAP_INPUT_PROVIDER), FULL_BOOTSTRAP_INPUT_PROVIDER,
DEFAULT_FULL_BOOTSTRAP_INPUT_PROVIDER);
return config;

View File

@@ -64,10 +64,14 @@ public abstract class BootstrapIndex implements Serializable {
* @return
*/
public final boolean useIndex() {
boolean validInstantTime = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().lastInstant()
.map(i -> HoodieTimeline.compareTimestamps(i.getTimestamp(), HoodieTimeline.GREATER_THAN_OR_EQUALS,
HoodieTimeline.METADATA_BOOTSTRAP_INSTANT_TS)).orElse(false);
return validInstantTime && metaClient.getTableConfig().getBootstrapBasePath().isPresent() && isPresent();
if (isPresent()) {
boolean validInstantTime = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().lastInstant()
.map(i -> HoodieTimeline.compareTimestamps(i.getTimestamp(), HoodieTimeline.GREATER_THAN_OR_EQUALS,
HoodieTimeline.METADATA_BOOTSTRAP_INSTANT_TS)).orElse(false);
return validInstantTime && metaClient.getTableConfig().getBootstrapBasePath().isPresent();
} else {
return false;
}
}
/**

View File

@@ -0,0 +1,51 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.common.bootstrap.index;
import org.apache.hudi.common.table.HoodieTableMetaClient;
/**
* No Op Bootstrap Index , which is a empty implement and not do anything.
*/
public class NoOpBootstrapIndex extends BootstrapIndex {
public NoOpBootstrapIndex(HoodieTableMetaClient metaClient) {
super(metaClient);
}
@Override
public IndexReader createReader() {
throw new RuntimeException("DefaultBootstrapIndex not support create reader!");
}
@Override
public IndexWriter createWriter(String sourceBasePath) {
throw new RuntimeException("DefaultBootstrapIndex not support create writer!");
}
@Override
public void dropIndex() {
throw new RuntimeException("DefaultBootstrapIndex not support drop index!");
}
@Override
protected boolean isPresent() {
return false;
}
}

View File

@@ -20,6 +20,7 @@ package org.apache.hudi.common.table;
import java.util.Arrays;
import org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex;
import org.apache.hudi.common.bootstrap.index.NoOpBootstrapIndex;
import org.apache.hudi.common.model.HoodieFileFormat;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload;
@@ -69,6 +70,7 @@ public class HoodieTableConfig implements Serializable {
public static final String HOODIE_TIMELINE_LAYOUT_VERSION = "hoodie.timeline.layout.version";
public static final String HOODIE_PAYLOAD_CLASS_PROP_NAME = "hoodie.compaction.payload.class";
public static final String HOODIE_ARCHIVELOG_FOLDER_PROP_NAME = "hoodie.archivelog.folder";
public static final String HOODIE_BOOTSTRAP_INDEX_ENABLE = "hoodie.bootstrap.index.enable";
public static final String HOODIE_BOOTSTRAP_INDEX_CLASS_PROP_NAME = "hoodie.bootstrap.index.class";
public static final String HOODIE_BOOTSTRAP_BASE_PATH = "hoodie.bootstrap.base.path";
@@ -77,6 +79,7 @@ public class HoodieTableConfig implements Serializable {
public static final HoodieFileFormat DEFAULT_BASE_FILE_FORMAT = HoodieFileFormat.PARQUET;
public static final HoodieFileFormat DEFAULT_LOG_FILE_FORMAT = HoodieFileFormat.HOODIE_LOG;
public static final String DEFAULT_PAYLOAD_CLASS = OverwriteWithLatestAvroPayload.class.getName();
public static final String NO_OP_BOOTSTRAP_INDEX_CLASS = NoOpBootstrapIndex.class.getName();
public static final String DEFAULT_BOOTSTRAP_INDEX_CLASS = HFileBootstrapIndex.class.getName();
public static final String DEFAULT_ARCHIVELOG_FOLDER = "";
@@ -146,7 +149,7 @@ public class HoodieTableConfig implements Serializable {
}
if (properties.containsKey(HOODIE_BOOTSTRAP_BASE_PATH) && !properties.containsKey(HOODIE_BOOTSTRAP_INDEX_CLASS_PROP_NAME)) {
// Use the default bootstrap index class.
properties.setProperty(HOODIE_BOOTSTRAP_INDEX_CLASS_PROP_NAME, DEFAULT_BOOTSTRAP_INDEX_CLASS);
properties.setProperty(HOODIE_BOOTSTRAP_INDEX_CLASS_PROP_NAME, getDefaultBootstrapIndexClass(properties));
}
properties.store(outputStream, "Properties saved on " + new Date(System.currentTimeMillis()));
}
@@ -209,7 +212,15 @@ public class HoodieTableConfig implements Serializable {
public String getBootstrapIndexClass() {
// There could be tables written with payload class from com.uber.hoodie. Need to transparently
// change to org.apache.hudi
return props.getProperty(HOODIE_BOOTSTRAP_INDEX_CLASS_PROP_NAME, DEFAULT_BOOTSTRAP_INDEX_CLASS);
return props.getProperty(HOODIE_BOOTSTRAP_INDEX_CLASS_PROP_NAME, getDefaultBootstrapIndexClass(props));
}
public static String getDefaultBootstrapIndexClass(Properties props) {
String defaultClass = DEFAULT_BOOTSTRAP_INDEX_CLASS;
if ("false".equalsIgnoreCase(props.getProperty(HOODIE_BOOTSTRAP_INDEX_ENABLE))) {
defaultClass = NO_OP_BOOTSTRAP_INDEX_CLASS;
}
return defaultClass;
}
public Option<String> getBootstrapBasePath() {

View File

@@ -24,6 +24,7 @@ import org.apache.hudi.avro.model.HoodiePath;
import org.apache.hudi.common.bootstrap.index.BootstrapIndex;
import org.apache.hudi.common.bootstrap.index.BootstrapIndex.IndexWriter;
import org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex;
import org.apache.hudi.common.bootstrap.index.NoOpBootstrapIndex;
import org.apache.hudi.common.model.BootstrapFileMapping;
import org.apache.hudi.common.model.HoodieFileGroupId;
import org.apache.hudi.common.table.HoodieTableConfig;
@@ -31,6 +32,7 @@ import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.testutils.HoodieCommonTestHarness;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsAction;
import java.io.IOException;
@@ -41,6 +43,7 @@ import java.util.Collections;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.ExecutorService;
@@ -86,6 +89,21 @@ public class TestBootstrapIndex extends HoodieCommonTestHarness {
testBootstrapIndexOneRound(5);
}
@Test
public void testNoOpBootstrapIndex() throws IOException {
Map<String, String> props = metaClient.getTableConfig().getProps();
props.put(HoodieTableConfig.HOODIE_BOOTSTRAP_INDEX_ENABLE, "false");
Properties properties = new Properties();
for (Map.Entry<String, String> prop : props.entrySet()) {
properties.setProperty(prop.getKey(), prop.getValue());
}
HoodieTableConfig.createHoodieProperties(metaClient.getFs(), new Path(metaClient.getMetaPath()), properties);
metaClient = HoodieTableMetaClient.builder().setConf(metaClient.getHadoopConf()).setBasePath(basePath).build();
BootstrapIndex bootstrapIndex = BootstrapIndex.getBootstrapIndex(metaClient);
assert (bootstrapIndex instanceof NoOpBootstrapIndex);
}
@Test
public void testBootstrapIndexConcurrent() throws Exception {
Map<String, List<BootstrapFileMapping>> bootstrapMapping = generateBootstrapIndex(metaClient, BOOTSTRAP_BASE_PATH, PARTITIONS, 100);

View File

@@ -411,7 +411,7 @@ class HoodieSparkSqlWriterSuite extends FunSuite with Matchers {
initSparkContext("test_schema_evolution")
val path = java.nio.file.Files.createTempDirectory("hoodie_test_path")
try {
val hoodieFooTableName = "hoodie_foo_tbl"
val hoodieFooTableName = "hoodie_foo_tbl_" + tableType
//create a new table
val fooTableModifier = Map("path" -> path.toAbsolutePath.toString,
HoodieWriteConfig.TABLE_NAME -> hoodieFooTableName,