1
0

[HUDI-2767] Enabling timeline-server-based marker as default (#4112)

- Changes the default config of marker type (HoodieWriteConfig.MARKERS_TYPE or hoodie.write.markers.type) from DIRECT to TIMELINE_SERVER_BASED for Spark Engine.
- Adds engine-specific marker type configs: Spark -> TIMELINE_SERVER_BASED, Flink -> DIRECT, Java -> DIRECT.
- Uses DIRECT markers as well for Spark structured streaming due to timeline server only available for the first mini-batch.
- Fixes the marker creation method for non-partitioned table in TimelineServerBasedWriteMarkers.
- Adds the fallback to direct markers even when TIMELINE_SERVER_BASED is configured, in WriteMarkersFactory: when HDFS is used, or embedded timeline server is disabled, the fallback to direct markers happens.
- Fixes the closing of timeline service.
- Fixes tests that depend on markers, mainly by starting the timeline service for each test.
This commit is contained in:
Y Ethan Guo
2021-11-26 13:41:05 -08:00
committed by GitHub
parent f8e0176eb0
commit d1e83e4ba0
35 changed files with 529 additions and 134 deletions

View File

@@ -19,8 +19,10 @@
package org.apache.hudi.config;
import org.apache.hudi.common.engine.EngineType;
import org.apache.hudi.common.table.marker.MarkerType;
import org.apache.hudi.config.HoodieWriteConfig.Builder;
import org.apache.hudi.index.HoodieIndex;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
@@ -32,6 +34,7 @@ import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import java.util.function.Function;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -67,63 +70,38 @@ public class TestHoodieWriteConfig {
@Test
public void testDefaultIndexAccordingToEngineType() {
// default bloom
HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath("/tmp").build();
assertEquals(HoodieIndex.IndexType.BLOOM, writeConfig.getIndexType());
// spark default bloom
writeConfig = HoodieWriteConfig.newBuilder().withEngineType(EngineType.SPARK).withPath("/tmp").build();
assertEquals(HoodieIndex.IndexType.BLOOM, writeConfig.getIndexType());
// flink default in-memory
writeConfig = HoodieWriteConfig.newBuilder().withEngineType(EngineType.FLINK).withPath("/tmp").build();
assertEquals(HoodieIndex.IndexType.INMEMORY, writeConfig.getIndexType());
testEngineSpecificConfig(HoodieWriteConfig::getIndexType,
constructConfigMap(
EngineType.SPARK, HoodieIndex.IndexType.BLOOM,
EngineType.FLINK, HoodieIndex.IndexType.INMEMORY,
EngineType.JAVA, HoodieIndex.IndexType.INMEMORY));
}
@Test
public void testDefaultClusteringPlanStrategyClassAccordingToEngineType() {
// Default (as Spark)
HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath("/tmp").build();
assertEquals(
HoodieClusteringConfig.SPARK_SIZED_BASED_CLUSTERING_PLAN_STRATEGY,
writeConfig.getClusteringPlanStrategyClass());
// Spark
writeConfig = HoodieWriteConfig.newBuilder().withEngineType(EngineType.SPARK).withPath("/tmp").build();
assertEquals(
HoodieClusteringConfig.SPARK_SIZED_BASED_CLUSTERING_PLAN_STRATEGY,
writeConfig.getClusteringPlanStrategyClass());
// Flink and Java
for (EngineType engineType : new EngineType[] {EngineType.FLINK, EngineType.JAVA}) {
writeConfig = HoodieWriteConfig.newBuilder().withEngineType(engineType).withPath("/tmp").build();
assertEquals(
HoodieClusteringConfig.JAVA_SIZED_BASED_CLUSTERING_PLAN_STRATEGY,
writeConfig.getClusteringPlanStrategyClass());
}
testEngineSpecificConfig(HoodieWriteConfig::getClusteringPlanStrategyClass,
constructConfigMap(
EngineType.SPARK, HoodieClusteringConfig.SPARK_SIZED_BASED_CLUSTERING_PLAN_STRATEGY,
EngineType.FLINK, HoodieClusteringConfig.JAVA_SIZED_BASED_CLUSTERING_PLAN_STRATEGY,
EngineType.JAVA, HoodieClusteringConfig.JAVA_SIZED_BASED_CLUSTERING_PLAN_STRATEGY));
}
@Test
public void testDefaultClusteringExecutionStrategyClassAccordingToEngineType() {
// Default (as Spark)
HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath("/tmp").build();
assertEquals(
HoodieClusteringConfig.SPARK_SORT_AND_SIZE_EXECUTION_STRATEGY,
writeConfig.getClusteringExecutionStrategyClass());
testEngineSpecificConfig(HoodieWriteConfig::getClusteringExecutionStrategyClass,
constructConfigMap(
EngineType.SPARK, HoodieClusteringConfig.SPARK_SORT_AND_SIZE_EXECUTION_STRATEGY,
EngineType.FLINK, HoodieClusteringConfig.JAVA_SORT_AND_SIZE_EXECUTION_STRATEGY,
EngineType.JAVA, HoodieClusteringConfig.JAVA_SORT_AND_SIZE_EXECUTION_STRATEGY));
}
// Spark
writeConfig = HoodieWriteConfig.newBuilder().withEngineType(EngineType.SPARK).withPath("/tmp").build();
assertEquals(
HoodieClusteringConfig.SPARK_SORT_AND_SIZE_EXECUTION_STRATEGY,
writeConfig.getClusteringExecutionStrategyClass());
// Flink and Java
for (EngineType engineType : new EngineType[] {EngineType.FLINK, EngineType.JAVA}) {
writeConfig = HoodieWriteConfig.newBuilder().withEngineType(engineType).withPath("/tmp").build();
assertEquals(
HoodieClusteringConfig.JAVA_SORT_AND_SIZE_EXECUTION_STRATEGY,
writeConfig.getClusteringExecutionStrategyClass());
}
@Test
public void testDefaultMarkersTypeAccordingToEngineType() {
testEngineSpecificConfig(HoodieWriteConfig::getMarkersType,
constructConfigMap(
EngineType.SPARK, MarkerType.TIMELINE_SERVER_BASED,
EngineType.FLINK, MarkerType.DIRECT,
EngineType.JAVA, MarkerType.DIRECT));
}
private ByteArrayOutputStream saveParamsIntoOutputStream(Map<String, String> params) throws IOException {
@@ -133,4 +111,44 @@ public class TestHoodieWriteConfig {
properties.store(outStream, "Saved on " + new Date(System.currentTimeMillis()));
return outStream;
}
/**
* Tests the engine-specific configuration values for one configuration key .
*
* @param getConfigFunc Function to get the config value.
* @param expectedConfigMap Expected config map, with key as the engine type
* and value as the corresponding config value for the engine.
*/
private void testEngineSpecificConfig(Function<HoodieWriteConfig, Object> getConfigFunc,
Map<EngineType, Object> expectedConfigMap) {
HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath("/tmp").build();
assertEquals(expectedConfigMap.get(EngineType.SPARK), getConfigFunc.apply(writeConfig));
for (EngineType engineType : expectedConfigMap.keySet()) {
writeConfig = HoodieWriteConfig.newBuilder()
.withEngineType(engineType).withPath("/tmp").build();
assertEquals(expectedConfigMap.get(engineType), getConfigFunc.apply(writeConfig));
}
}
/**
* Constructs the map.
*
* @param k1 First engine type.
* @param v1 Config value for the first engine type.
* @param k2 Second engine type.
* @param v2 Config value for the second engine type.
* @param k3 Third engine type.
* @param v3 Config value for the third engine type.
* @return {@link Map<EngineType, Object>} instance, with key as the engine type
* and value as the corresponding config value for the engine.
*/
private Map<EngineType, Object> constructConfigMap(
EngineType k1, Object v1, EngineType k2, Object v2, EngineType k3, Object v3) {
Map<EngineType, Object> mapping = new HashMap<>();
mapping.put(k1, v1);
mapping.put(k2, v2);
mapping.put(k3, v3);
return mapping;
}
}

View File

@@ -0,0 +1,116 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hudi.table.marker;
import org.apache.hudi.common.config.SerializableConfiguration;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.fs.HoodieWrapperFileSystem;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.marker.MarkerType;
import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
import org.apache.hudi.common.testutils.HoodieCommonTestHarness;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
import org.apache.hadoop.conf.Configuration;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import org.mockito.Mockito;
import java.io.IOException;
import java.util.stream.Stream;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.mockito.ArgumentMatchers.any;
public class TestWriteMarkersFactory extends HoodieCommonTestHarness {
private static final String NON_HDFS_BASE_PATH = "/tmp/dir";
private static final String HDFS_BASE_PATH = "hdfs://localhost/dir";
private final HoodieWriteConfig writeConfig = Mockito.mock(HoodieWriteConfig.class);
private final HoodieTableMetaClient metaClient = Mockito.mock(HoodieTableMetaClient.class);
private final HoodieWrapperFileSystem fileSystem = Mockito.mock(HoodieWrapperFileSystem.class);
private final HoodieEngineContext context = Mockito.mock(HoodieEngineContext.class);
private final HoodieTable table = Mockito.mock(HoodieTable.class);
@BeforeEach
public void init() throws IOException {
initMetaClient();
}
public static Stream<Arguments> configParams() {
Object[][] data = new Object[][] {
{NON_HDFS_BASE_PATH, true}, {HDFS_BASE_PATH, false},
{NON_HDFS_BASE_PATH, true}, {HDFS_BASE_PATH, false},
};
return Stream.of(data).map(Arguments::of);
}
@ParameterizedTest
@MethodSource("configParams")
public void testDirectMarkers(String basePath, boolean isTimelineServerEnabled) {
testWriteMarkersFactory(
MarkerType.DIRECT, basePath, isTimelineServerEnabled, DirectWriteMarkers.class);
}
@Test
public void testTimelineServerBasedMarkersWithTimelineServerEnabled() {
testWriteMarkersFactory(
MarkerType.TIMELINE_SERVER_BASED, NON_HDFS_BASE_PATH, true,
TimelineServerBasedWriteMarkers.class);
}
@Test
public void testTimelineServerBasedMarkersWithTimelineServerDisabled() {
// Fallback to direct markers should happen
testWriteMarkersFactory(
MarkerType.TIMELINE_SERVER_BASED, NON_HDFS_BASE_PATH, false,
DirectWriteMarkers.class);
}
@Test
public void testTimelineServerBasedMarkersWithHDFS() {
// Fallback to direct markers should happen
testWriteMarkersFactory(
MarkerType.TIMELINE_SERVER_BASED, HDFS_BASE_PATH, true,
DirectWriteMarkers.class);
}
private void testWriteMarkersFactory(
MarkerType markerTypeConfig, String basePath, boolean isTimelineServerEnabled,
Class<?> expectedWriteMarkersClass) {
String instantTime = "001";
Mockito.when(table.getConfig()).thenReturn(writeConfig);
Mockito.when(writeConfig.isEmbeddedTimelineServerEnabled())
.thenReturn(isTimelineServerEnabled);
Mockito.when(table.getMetaClient()).thenReturn(metaClient);
Mockito.when(metaClient.getFs()).thenReturn(fileSystem);
Mockito.when(metaClient.getBasePath()).thenReturn(basePath);
Mockito.when(metaClient.getMarkerFolderPath(any())).thenReturn(basePath + ".hoodie/.temp");
Mockito.when(table.getContext()).thenReturn(context);
Mockito.when(context.getHadoopConf()).thenReturn(new SerializableConfiguration(new Configuration()));
Mockito.when(writeConfig.getViewStorageConfig())
.thenReturn(FileSystemViewStorageConfig.newBuilder().build());
assertEquals(expectedWriteMarkersClass,
WriteMarkersFactory.get(markerTypeConfig, table, instantTime).getClass());
}
}