1
0

[HUDI-2767] Enabling timeline-server-based marker as default (#4112)

- Changes the default config of marker type (HoodieWriteConfig.MARKERS_TYPE or hoodie.write.markers.type) from DIRECT to TIMELINE_SERVER_BASED for Spark Engine.
- Adds engine-specific marker type configs: Spark -> TIMELINE_SERVER_BASED, Flink -> DIRECT, Java -> DIRECT.
- Uses DIRECT markers as well for Spark structured streaming due to timeline server only available for the first mini-batch.
- Fixes the marker creation method for non-partitioned table in TimelineServerBasedWriteMarkers.
- Adds the fallback to direct markers even when TIMELINE_SERVER_BASED is configured, in WriteMarkersFactory: when HDFS is used, or embedded timeline server is disabled, the fallback to direct markers happens.
- Fixes the closing of timeline service.
- Fixes tests that depend on markers, mainly by starting the timeline service for each test.
This commit is contained in:
Y Ethan Guo
2021-11-26 13:41:05 -08:00
committed by GitHub
parent f8e0176eb0
commit d1e83e4ba0
35 changed files with 529 additions and 134 deletions

View File

@@ -29,6 +29,7 @@ import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
import org.apache.hudi.config.HoodieCompactionConfig;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieSparkTable;
@@ -72,6 +73,8 @@ public class TestArchivedCommitsCommand extends CLIFunctionalTestHarness {
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath)
.withSchema(HoodieTestCommitMetadataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
.withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build())
.withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder()
.withRemoteServerPort(timelineServicePort).build())
.withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(false).build())
.forTable("test-trip-table").build();

View File

@@ -32,6 +32,7 @@ import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
import org.apache.hudi.common.testutils.HoodieTestUtils;
import org.apache.hudi.common.util.NumericUtils;
@@ -209,6 +210,8 @@ public class TestCommitsCommand extends CLIFunctionalTestHarness {
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath1)
.withSchema(HoodieTestCommitMetadataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
.withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build())
.withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder()
.withRemoteServerPort(timelineServicePort).build())
.withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(false).build())
.forTable("test-trip-table").build();

View File

@@ -33,6 +33,7 @@ import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
import org.apache.hudi.common.testutils.CompactionTestUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieCompactionConfig;
@@ -159,6 +160,8 @@ public class TestCompactionCommand extends CLIFunctionalTestHarness {
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath)
.withSchema(HoodieTestCommitMetadataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
.withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build())
.withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder()
.withRemoteServerPort(timelineServicePort).build())
.withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(false).build())
.forTable("test-trip-table").build();
// archive

View File

@@ -28,8 +28,10 @@ import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.HoodieTableVersion;
import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
import org.apache.hudi.common.testutils.FileCreateUtils;
import org.apache.hudi.common.testutils.HoodieTestTable;
import org.apache.hudi.testutils.HoodieClientTestUtils;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
@@ -62,6 +64,8 @@ public class TestUpgradeDowngradeCommand extends CLIFunctionalTestHarness {
new TableCommand().createTable(
tablePath, tableName, HoodieTableType.COPY_ON_WRITE.name(),
"", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload");
timelineService = HoodieClientTestUtils.initTimelineService(
context, basePath(), FileSystemViewStorageConfig.REMOTE_PORT_NUM.defaultValue());
metaClient = HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient());
//Create some commits files and base files
HoodieTestTable.of(metaClient)

View File

@@ -22,7 +22,10 @@ package org.apache.hudi.cli.functional;
import org.apache.hudi.client.HoodieReadClient;
import org.apache.hudi.client.SparkRDDWriteClient;
import org.apache.hudi.client.common.HoodieSparkEngineContext;
import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
import org.apache.hudi.testutils.HoodieClientTestUtils;
import org.apache.hudi.testutils.providers.SparkProvider;
import org.apache.hudi.timeline.service.TimelineService;
import org.apache.hadoop.conf.Configuration;
import org.apache.spark.SparkConf;
@@ -39,10 +42,13 @@ import java.nio.file.Paths;
public class CLIFunctionalTestHarness implements SparkProvider {
protected static int timelineServicePort =
FileSystemViewStorageConfig.REMOTE_PORT_NUM.defaultValue();
protected static transient TimelineService timelineService;
protected static transient HoodieSparkEngineContext context;
private static transient SparkSession spark;
private static transient SQLContext sqlContext;
private static transient JavaSparkContext jsc;
private static transient HoodieSparkEngineContext context;
private static transient JLineShellComponent shell;
/**
* An indicator of the initialization status.
@@ -107,6 +113,9 @@ public class CLIFunctionalTestHarness implements SparkProvider {
jsc = new JavaSparkContext(spark.sparkContext());
context = new HoodieSparkEngineContext(jsc);
shell = new Bootstrap().getJLineShellComponent();
timelineService = HoodieClientTestUtils.initTimelineService(
context, basePath(), incrementTimelineServicePortToUse());
timelineServicePort = timelineService.getServerPort();
}
}
@@ -120,14 +129,25 @@ public class CLIFunctionalTestHarness implements SparkProvider {
shell.stop();
shell = null;
}
if (timelineService != null) {
timelineService.close();
}
}
/**
* Helper to prepare string for matching.
*
* @param str Input string.
* @return pruned string with non word characters removed.
*/
protected static String removeNonWordAndStripSpace(String str) {
return str.replaceAll("[\\s]+", ",").replaceAll("[\\W]+", ",");
}
protected int incrementTimelineServicePortToUse() {
// Increment the timeline service port for each individual test
// to avoid port reuse causing failures
timelineServicePort = (timelineServicePort + 1 - 1024) % (65536 - 1024) + 1024;
return timelineServicePort;
}
}