From 12443e41873269a2c0ba12447f73c8a88fa7138f Mon Sep 17 00:00:00 2001 From: xoln ann Date: Fri, 14 May 2021 12:29:26 +0800 Subject: [PATCH] [HUDI-1446] Support skip bootstrapIndex's init in abstract fs view init (#2520) Co-authored-by: zhongliang Co-authored-by: Sivabalan Narayanan --- .../hudi/config/HoodieBootstrapConfig.java | 3 +- .../bootstrap/index/BootstrapIndex.java | 12 +++-- .../bootstrap/index/NoOpBootstrapIndex.java | 51 +++++++++++++++++++ .../hudi/common/table/HoodieTableConfig.java | 15 +++++- .../common/bootstrap/TestBootstrapIndex.java | 18 +++++++ .../HoodieSparkSqlWriterSuite.scala | 2 +- 6 files changed, 93 insertions(+), 8 deletions(-) create mode 100644 hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/NoOpBootstrapIndex.java diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieBootstrapConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieBootstrapConfig.java index bde2e03da..fb55442ab 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieBootstrapConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieBootstrapConfig.java @@ -23,6 +23,7 @@ import org.apache.hudi.client.bootstrap.selector.MetadataOnlyBootstrapModeSelect import org.apache.hudi.client.bootstrap.translator.IdentityBootstrapPartitionPathTranslator; import org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex; import org.apache.hudi.common.config.DefaultHoodieConfig; +import org.apache.hudi.common.table.HoodieTableConfig; import java.io.File; import java.io.FileReader; @@ -135,7 +136,7 @@ public class HoodieBootstrapConfig extends DefaultHoodieConfig { BOOTSTRAP_MODE_SELECTOR_REGEX_MODE, DEFAULT_BOOTSTRAP_MODE_SELECTOR_REGEX_MODE); BootstrapMode.valueOf(props.getProperty(BOOTSTRAP_MODE_SELECTOR_REGEX_MODE)); setDefaultOnCondition(props, !props.containsKey(BOOTSTRAP_INDEX_CLASS_PROP), BOOTSTRAP_INDEX_CLASS_PROP, - DEFAULT_BOOTSTRAP_INDEX_CLASS); + HoodieTableConfig.getDefaultBootstrapIndexClass(props)); setDefaultOnCondition(props, !props.containsKey(FULL_BOOTSTRAP_INPUT_PROVIDER), FULL_BOOTSTRAP_INPUT_PROVIDER, DEFAULT_FULL_BOOTSTRAP_INPUT_PROVIDER); return config; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/BootstrapIndex.java b/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/BootstrapIndex.java index 08d7f86ae..6aafeca53 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/BootstrapIndex.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/BootstrapIndex.java @@ -64,10 +64,14 @@ public abstract class BootstrapIndex implements Serializable { * @return */ public final boolean useIndex() { - boolean validInstantTime = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().lastInstant() - .map(i -> HoodieTimeline.compareTimestamps(i.getTimestamp(), HoodieTimeline.GREATER_THAN_OR_EQUALS, - HoodieTimeline.METADATA_BOOTSTRAP_INSTANT_TS)).orElse(false); - return validInstantTime && metaClient.getTableConfig().getBootstrapBasePath().isPresent() && isPresent(); + if (isPresent()) { + boolean validInstantTime = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().lastInstant() + .map(i -> HoodieTimeline.compareTimestamps(i.getTimestamp(), HoodieTimeline.GREATER_THAN_OR_EQUALS, + HoodieTimeline.METADATA_BOOTSTRAP_INSTANT_TS)).orElse(false); + return validInstantTime && metaClient.getTableConfig().getBootstrapBasePath().isPresent(); + } else { + return false; + } } /** diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/NoOpBootstrapIndex.java b/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/NoOpBootstrapIndex.java new file mode 100644 index 000000000..e4e32fa12 --- /dev/null +++ b/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/NoOpBootstrapIndex.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.common.bootstrap.index; + +import org.apache.hudi.common.table.HoodieTableMetaClient; + +/** + * No Op Bootstrap Index , which is a empty implement and not do anything. + */ +public class NoOpBootstrapIndex extends BootstrapIndex { + + public NoOpBootstrapIndex(HoodieTableMetaClient metaClient) { + super(metaClient); + } + + @Override + public IndexReader createReader() { + throw new RuntimeException("DefaultBootstrapIndex not support create reader!"); + } + + @Override + public IndexWriter createWriter(String sourceBasePath) { + throw new RuntimeException("DefaultBootstrapIndex not support create writer!"); + } + + @Override + public void dropIndex() { + throw new RuntimeException("DefaultBootstrapIndex not support drop index!"); + } + + @Override + protected boolean isPresent() { + return false; + } +} diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java index 0b36e31ee..6d5b248b1 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java @@ -20,6 +20,7 @@ package org.apache.hudi.common.table; import java.util.Arrays; import org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex; +import org.apache.hudi.common.bootstrap.index.NoOpBootstrapIndex; import org.apache.hudi.common.model.HoodieFileFormat; import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload; @@ -69,6 +70,7 @@ public class HoodieTableConfig implements Serializable { public static final String HOODIE_TIMELINE_LAYOUT_VERSION = "hoodie.timeline.layout.version"; public static final String HOODIE_PAYLOAD_CLASS_PROP_NAME = "hoodie.compaction.payload.class"; public static final String HOODIE_ARCHIVELOG_FOLDER_PROP_NAME = "hoodie.archivelog.folder"; + public static final String HOODIE_BOOTSTRAP_INDEX_ENABLE = "hoodie.bootstrap.index.enable"; public static final String HOODIE_BOOTSTRAP_INDEX_CLASS_PROP_NAME = "hoodie.bootstrap.index.class"; public static final String HOODIE_BOOTSTRAP_BASE_PATH = "hoodie.bootstrap.base.path"; @@ -77,6 +79,7 @@ public class HoodieTableConfig implements Serializable { public static final HoodieFileFormat DEFAULT_BASE_FILE_FORMAT = HoodieFileFormat.PARQUET; public static final HoodieFileFormat DEFAULT_LOG_FILE_FORMAT = HoodieFileFormat.HOODIE_LOG; public static final String DEFAULT_PAYLOAD_CLASS = OverwriteWithLatestAvroPayload.class.getName(); + public static final String NO_OP_BOOTSTRAP_INDEX_CLASS = NoOpBootstrapIndex.class.getName(); public static final String DEFAULT_BOOTSTRAP_INDEX_CLASS = HFileBootstrapIndex.class.getName(); public static final String DEFAULT_ARCHIVELOG_FOLDER = ""; @@ -146,7 +149,7 @@ public class HoodieTableConfig implements Serializable { } if (properties.containsKey(HOODIE_BOOTSTRAP_BASE_PATH) && !properties.containsKey(HOODIE_BOOTSTRAP_INDEX_CLASS_PROP_NAME)) { // Use the default bootstrap index class. - properties.setProperty(HOODIE_BOOTSTRAP_INDEX_CLASS_PROP_NAME, DEFAULT_BOOTSTRAP_INDEX_CLASS); + properties.setProperty(HOODIE_BOOTSTRAP_INDEX_CLASS_PROP_NAME, getDefaultBootstrapIndexClass(properties)); } properties.store(outputStream, "Properties saved on " + new Date(System.currentTimeMillis())); } @@ -209,7 +212,15 @@ public class HoodieTableConfig implements Serializable { public String getBootstrapIndexClass() { // There could be tables written with payload class from com.uber.hoodie. Need to transparently // change to org.apache.hudi - return props.getProperty(HOODIE_BOOTSTRAP_INDEX_CLASS_PROP_NAME, DEFAULT_BOOTSTRAP_INDEX_CLASS); + return props.getProperty(HOODIE_BOOTSTRAP_INDEX_CLASS_PROP_NAME, getDefaultBootstrapIndexClass(props)); + } + + public static String getDefaultBootstrapIndexClass(Properties props) { + String defaultClass = DEFAULT_BOOTSTRAP_INDEX_CLASS; + if ("false".equalsIgnoreCase(props.getProperty(HOODIE_BOOTSTRAP_INDEX_ENABLE))) { + defaultClass = NO_OP_BOOTSTRAP_INDEX_CLASS; + } + return defaultClass; } public Option getBootstrapBasePath() { diff --git a/hudi-common/src/test/java/org/apache/hudi/common/bootstrap/TestBootstrapIndex.java b/hudi-common/src/test/java/org/apache/hudi/common/bootstrap/TestBootstrapIndex.java index 593f82bb0..b0adeacaa 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/bootstrap/TestBootstrapIndex.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/bootstrap/TestBootstrapIndex.java @@ -24,6 +24,7 @@ import org.apache.hudi.avro.model.HoodiePath; import org.apache.hudi.common.bootstrap.index.BootstrapIndex; import org.apache.hudi.common.bootstrap.index.BootstrapIndex.IndexWriter; import org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex; +import org.apache.hudi.common.bootstrap.index.NoOpBootstrapIndex; import org.apache.hudi.common.model.BootstrapFileMapping; import org.apache.hudi.common.model.HoodieFileGroupId; import org.apache.hudi.common.table.HoodieTableConfig; @@ -31,6 +32,7 @@ import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.testutils.HoodieCommonTestHarness; import org.apache.hudi.common.util.collection.Pair; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsAction; import java.io.IOException; @@ -41,6 +43,7 @@ import java.util.Collections; import java.util.Date; import java.util.List; import java.util.Map; +import java.util.Properties; import java.util.Set; import java.util.UUID; import java.util.concurrent.ExecutorService; @@ -86,6 +89,21 @@ public class TestBootstrapIndex extends HoodieCommonTestHarness { testBootstrapIndexOneRound(5); } + @Test + public void testNoOpBootstrapIndex() throws IOException { + Map props = metaClient.getTableConfig().getProps(); + props.put(HoodieTableConfig.HOODIE_BOOTSTRAP_INDEX_ENABLE, "false"); + Properties properties = new Properties(); + for (Map.Entry prop : props.entrySet()) { + properties.setProperty(prop.getKey(), prop.getValue()); + } + HoodieTableConfig.createHoodieProperties(metaClient.getFs(), new Path(metaClient.getMetaPath()), properties); + + metaClient = HoodieTableMetaClient.builder().setConf(metaClient.getHadoopConf()).setBasePath(basePath).build(); + BootstrapIndex bootstrapIndex = BootstrapIndex.getBootstrapIndex(metaClient); + assert (bootstrapIndex instanceof NoOpBootstrapIndex); + } + @Test public void testBootstrapIndexConcurrent() throws Exception { Map> bootstrapMapping = generateBootstrapIndex(metaClient, BOOTSTRAP_BASE_PATH, PARTITIONS, 100); diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/HoodieSparkSqlWriterSuite.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/HoodieSparkSqlWriterSuite.scala index 606435aca..269c4acdf 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/HoodieSparkSqlWriterSuite.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/HoodieSparkSqlWriterSuite.scala @@ -411,7 +411,7 @@ class HoodieSparkSqlWriterSuite extends FunSuite with Matchers { initSparkContext("test_schema_evolution") val path = java.nio.file.Files.createTempDirectory("hoodie_test_path") try { - val hoodieFooTableName = "hoodie_foo_tbl" + val hoodieFooTableName = "hoodie_foo_tbl_" + tableType //create a new table val fooTableModifier = Map("path" -> path.toAbsolutePath.toString, HoodieWriteConfig.TABLE_NAME -> hoodieFooTableName,