1
0

[HUDI-1241] Automate the generation of configs webpage as configs are added to Hudi repo (#3302)

This commit is contained in:
rmahindra123
2021-07-23 21:33:34 -07:00
committed by GitHub
parent b2f7fcb8c8
commit a14b19fdd5
24 changed files with 265 additions and 0 deletions

View File

@@ -22,6 +22,8 @@ import org.apache.hudi.client.bootstrap.BootstrapMode;
import org.apache.hudi.client.bootstrap.selector.MetadataOnlyBootstrapModeSelector;
import org.apache.hudi.client.bootstrap.translator.IdentityBootstrapPartitionPathTranslator;
import org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex;
import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;
import org.apache.hudi.common.table.HoodieTableConfig;
@@ -35,6 +37,11 @@ import java.util.Properties;
/**
* Bootstrap specific configs.
*/
@ConfigClassProperty(name = "Bootstrap Configs",
groupName = ConfigGroups.Names.WRITE_CLIENT,
description = "Configurations that control how you want to bootstrap your existing tables for the first time into hudi. "
+ "The bootstrap operation can flexibly avoid copying data over before you can use Hudi and support running the existing "
+ " writers and new hudi writers in parallel, to validate the migration.")
public class HoodieBootstrapConfig extends HoodieConfig {
public static final ConfigProperty<String> BOOTSTRAP_BASE_PATH_PROP = ConfigProperty

View File

@@ -18,6 +18,8 @@
package org.apache.hudi.config;
import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;
@@ -29,6 +31,10 @@ import java.util.Properties;
/**
* Clustering specific configs.
*/
@ConfigClassProperty(name = "Clustering Configs",
groupName = ConfigGroups.Names.WRITE_CLIENT,
description = "Configurations that control the clustering table service in hudi, "
+ "which optimizes the storage layout for better query performance by sorting and sizing data files.")
public class HoodieClusteringConfig extends HoodieConfig {
// Any strategy specific params can be saved with this prefix

View File

@@ -18,6 +18,8 @@
package org.apache.hudi.config;
import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;
import org.apache.hudi.common.model.HoodieCleaningPolicy;
@@ -41,6 +43,11 @@ import java.util.stream.Collectors;
* Compaction related config.
*/
@Immutable
@ConfigClassProperty(name = "Compaction Configs",
groupName = ConfigGroups.Names.WRITE_CLIENT,
description = "Configurations that control compaction "
+ "(merging of log files onto a new base files) as well as "
+ "cleaning (reclamation of older/unused file groups/slices).")
public class HoodieCompactionConfig extends HoodieConfig {
public static final ConfigProperty<String> AUTO_CLEAN_PROP = ConfigProperty

View File

@@ -18,6 +18,8 @@
package org.apache.hudi.config;
import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;
import org.apache.hudi.index.hbase.DefaultHBaseQPSResourceAllocator;
@@ -27,6 +29,11 @@ import java.io.FileReader;
import java.io.IOException;
import java.util.Properties;
@ConfigClassProperty(name = "HBase Index Configs",
groupName = ConfigGroups.Names.WRITE_CLIENT,
description = "Configurations that control indexing behavior "
+ "(when HBase based indexing is enabled), which tags incoming "
+ "records as either inserts or updates to older records.")
public class HoodieHBaseIndexConfig extends HoodieConfig {
public static final ConfigProperty<String> HBASE_ZKQUORUM_PROP = ConfigProperty

View File

@@ -19,6 +19,8 @@
package org.apache.hudi.config;
import org.apache.hudi.common.bloom.BloomFilterTypeCode;
import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;
import org.apache.hudi.common.engine.EngineType;
@@ -36,6 +38,10 @@ import java.util.Properties;
* Indexing related config.
*/
@Immutable
@ConfigClassProperty(name = "Index Configs",
groupName = ConfigGroups.Names.WRITE_CLIENT,
description = "Configurations that control indexing behavior, "
+ "which tags incoming records as either inserts or updates to older records.")
public class HoodieIndexConfig extends HoodieConfig {
public static final ConfigProperty<String> INDEX_TYPE_PROP = ConfigProperty

View File

@@ -20,6 +20,8 @@ package org.apache.hudi.config;
import org.apache.hudi.client.transaction.ConflictResolutionStrategy;
import org.apache.hudi.client.transaction.SimpleConcurrentFileWritesConflictResolutionStrategy;
import org.apache.hudi.client.transaction.lock.ZookeeperBasedLockProvider;
import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;
import org.apache.hudi.common.lock.LockProvider;
@@ -55,6 +57,11 @@ import static org.apache.hudi.common.config.LockConfiguration.ZK_SESSION_TIMEOUT
/**
* Hoodie Configs for Locks.
*/
@ConfigClassProperty(name = "Locks Configurations",
groupName = ConfigGroups.Names.WRITE_CLIENT,
description = "Configs that control locking mechanisms required for concurrency control "
+ " between writers to a Hudi table. Concurrency between Hudi's own table services "
+ " are auto managed internally.")
public class HoodieLockConfig extends HoodieConfig {
public static final ConfigProperty<String> LOCK_ACQUIRE_RETRY_WAIT_TIME_IN_MILLIS_PROP = ConfigProperty

View File

@@ -18,6 +18,8 @@
package org.apache.hudi.config;
import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;
@@ -32,6 +34,10 @@ import java.util.Properties;
* Memory related config.
*/
@Immutable
@ConfigClassProperty(name = "Memory Configurations",
groupName = ConfigGroups.Names.WRITE_CLIENT,
description = "Controls memory usage for compaction "
+ "and merges, performed internally by Hudi.")
public class HoodieMemoryConfig extends HoodieConfig {
// Default max memory fraction during hash-merge, excess spills to disk

View File

@@ -18,6 +18,8 @@
package org.apache.hudi.config;
import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;
import org.apache.hudi.metrics.MetricsReporterType;
@@ -33,6 +35,10 @@ import java.util.Properties;
* Fetch the configurations used by the Metrics system.
*/
@Immutable
@ConfigClassProperty(name = "Metrics Configurations",
groupName = ConfigGroups.Names.METRICS,
description = "Enables reporting on Hudi metrics. Hudi publishes metrics on "
+ "every commit, clean, rollback etc. The following sections list the supported reporters.")
public class HoodieMetricsConfig extends HoodieConfig {
public static final String METRIC_PREFIX = "hoodie.metrics";

View File

@@ -18,6 +18,8 @@
package org.apache.hudi.config;
import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;
@@ -33,6 +35,10 @@ import static org.apache.hudi.config.HoodieMetricsConfig.METRIC_PREFIX;
* {@link org.apache.hudi.metrics.MetricsReporterType#DATADOG}
*/
@Immutable
@ConfigClassProperty(name = "Metrics Configurations for Datadog reporter",
groupName = ConfigGroups.Names.METRICS,
description = "Enables reporting on Hudi metrics using the Datadog reporter type. "
+ "Hudi publishes metrics on every commit, clean, rollback etc.")
public class HoodieMetricsDatadogConfig extends HoodieConfig {
public static final String DATADOG_PREFIX = METRIC_PREFIX + ".datadog";

View File

@@ -18,6 +18,8 @@
package org.apache.hudi.config;
import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;
@@ -25,6 +27,10 @@ import java.util.Properties;
import static org.apache.hudi.config.HoodieMetricsConfig.METRIC_PREFIX;
@ConfigClassProperty(name = "Metrics Configurations for Prometheus",
groupName = ConfigGroups.Names.METRICS,
description = "Enables reporting on Hudi metrics using Prometheus. "
+ " Hudi publishes metrics on every commit, clean, rollback etc.")
public class HoodieMetricsPrometheusConfig extends HoodieConfig {
// Prometheus PushGateWay

View File

@@ -18,6 +18,8 @@
package org.apache.hudi.config;
import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;
@@ -32,6 +34,10 @@ import static org.apache.hudi.common.model.HoodiePayloadProps.PAYLOAD_ORDERING_F
/**
* Hoodie payload related configs.
*/
@ConfigClassProperty(name = "Payload Configurations",
groupName = ConfigGroups.Names.RECORD_PAYLOAD,
description = "Payload related configs, that can be leveraged to "
+ "control merges based on specific business fields in the data.")
public class HoodiePayloadConfig extends HoodieConfig {
public static final ConfigProperty<String> PAYLOAD_ORDERING_FIELD_PROP = ConfigProperty

View File

@@ -18,6 +18,8 @@
package org.apache.hudi.config;
import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;
@@ -32,6 +34,9 @@ import java.util.Properties;
* Storage related config.
*/
@Immutable
@ConfigClassProperty(name = "Storage Configs",
groupName = ConfigGroups.Names.WRITE_CLIENT,
description = "Configurations that control aspects around writing, sizing, reading base and log files.")
public class HoodieStorageConfig extends HoodieConfig {
public static final ConfigProperty<String> PARQUET_FILE_MAX_BYTES = ConfigProperty

View File

@@ -17,6 +17,8 @@
package org.apache.hudi.config;
import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;
@@ -28,6 +30,10 @@ import java.util.Properties;
/**
* Write callback related config.
*/
@ConfigClassProperty(name = "Write commit callback configs",
groupName = ConfigGroups.Names.WRITE_CLIENT,
description = "Controls callback behavior into HTTP endpoints, to push "
+ " notifications on commits on hudi tables.")
public class HoodieWriteCommitCallbackConfig extends HoodieConfig {
public static final String CALLBACK_PREFIX = "hoodie.write.commit.callback.";

View File

@@ -21,6 +21,8 @@ package org.apache.hudi.config;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.client.bootstrap.BootstrapMode;
import org.apache.hudi.client.transaction.ConflictResolutionStrategy;
import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;
import org.apache.hudi.common.config.HoodieMetadataConfig;
@@ -68,6 +70,10 @@ import java.util.stream.Collectors;
* Class storing configs for the HoodieWriteClient.
*/
@Immutable
@ConfigClassProperty(name = "Write Configurations",
groupName = ConfigGroups.Names.WRITE_CLIENT,
description = "Configurations that control write behavior on Hudi tables. These can be directly passed down from even "
+ "higher level frameworks (e.g Spark datasources, Flink sink) and utilities (e.g DeltaStreamer).")
public class HoodieWriteConfig extends HoodieConfig {
private static final long serialVersionUID = 0L;

View File

@@ -18,9 +18,17 @@
package org.apache.hudi.keygen.constant;
import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;
@ConfigClassProperty(name = "Key Generator Options",
groupName = ConfigGroups.Names.WRITE_CLIENT,
description = "Hudi maintains keys (record key + partition path) "
+ "for uniquely identifying a particular record. "
+ "This config allows developers to setup the Key generator class that "
+ "will extract these out of incoming records.")
public class KeyGeneratorOptions extends HoodieConfig {
public static final ConfigProperty<String> URL_ENCODE_PARTITIONING_OPT_KEY = ConfigProperty

View File

@@ -0,0 +1,39 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.common.config;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.TYPE)
/**
* Annotation for superclasses of {@link HoodieConfig} that includes the
* human-readable name of the config class, the config group ({@link ConfigGroupName})
* it belongs to (e.g., spark/ flink/ write)
* and the description of the config class.
*/
public @interface ConfigClassProperty {
String name();
ConfigGroups.Names groupName();
String description();
}

View File

@@ -0,0 +1,81 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.common.config;
/**
* In Hudi, we have multiple superclasses, aka Config Classes of {@link HoodieConfig} that maintain
* several configs. This class group one or more of these superclasses into higher
* level groups, such as Spark Config, Flink Configs, Metrics ....
* This class maintains the human readable name and description of each config group.
*/
public class ConfigGroups {
public enum Names {
SPARK_DATASOURCE("Spark Datasource Configs"),
FLINK_SQL("Flink Sql Configs"),
WRITE_CLIENT("Write Client Configs"),
METRICS("Metrics Configs"),
RECORD_PAYLOAD("Record Payload Config");
public final String name;
Names(String name) {
this.name = name;
}
}
public static String getDescription(Names names) {
String description;
switch (names) {
case SPARK_DATASOURCE:
description = "These configs control the Hudi Spark Datasource, "
+ "providing ability to define keys/partitioning, pick out the write operation, "
+ "specify how to merge records or choosing query type to read.";
break;
case FLINK_SQL:
description = "These configs control the Hudi Flink SQL source/sink connectors, "
+ "providing ability to define record keys, pick out the write operation, "
+ "specify how to merge records, enable/disable asynchronous compaction "
+ "or choosing query type to read.";
break;
case WRITE_CLIENT:
description = "Internally, the Hudi datasource uses a RDD based HoodieWriteClient API "
+ "to actually perform writes to storage. These configs provide deep control over "
+ "lower level aspects like file sizing, compression, parallelism, compaction, "
+ "write schema, cleaning etc. Although Hudi provides sane defaults, from time-time "
+ "these configs may need to be tweaked to optimize for specific workloads.";
break;
case RECORD_PAYLOAD:
description = "This is the lowest level of customization offered by Hudi. "
+ "Record payloads define how to produce new values to upsert based on incoming "
+ "new record and stored old record. Hudi provides default implementations such as "
+ "OverwriteWithLatestAvroPayload which simply update table with the latest/last-written record. "
+ "This can be overridden to a custom class extending HoodieRecordPayload class, "
+ "on both datasource and WriteClient levels.";
break;
case METRICS:
description = "These set of configs are used to enable monitoring and reporting of key"
+ "Hudi stats and metrics.";
break;
default:
description = "Please fill in the description for Config Group Name: " + names.name;
break;
}
return description;
}
}

View File

@@ -19,6 +19,7 @@
package org.apache.hudi.common.config;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.exception.HoodieException;
import java.io.Serializable;
@@ -76,6 +77,18 @@ public class ConfigProperty<T> implements Serializable {
return defaultValue != null;
}
public String doc() {
return StringUtils.isNullOrEmpty(doc) ? StringUtils.EMPTY_STRING : doc;
}
public Option<String> getSinceVersion() {
return sinceVersion;
}
public Option<String> getDeprecatedVersion() {
return deprecatedVersion;
}
Option<Function<HoodieConfig, Option<T>>> getInferFunc() {
return inferFunction;
}

View File

@@ -28,6 +28,11 @@ import java.util.Properties;
* Configurations used by the HUDI Metadata Table.
*/
@Immutable
@ConfigClassProperty(name = "Metadata Configs",
groupName = ConfigGroups.Names.WRITE_CLIENT,
description = "Configurations used by the Hudi Metadata Table. "
+ "This table maintains the metadata about a given Hudi table (e.g file listings) "
+ " to avoid overhead of accessing cloud storage, during queries.")
public final class HoodieMetadataConfig extends HoodieConfig {
public static final String METADATA_PREFIX = "hoodie.metadata";

View File

@@ -18,6 +18,8 @@
package org.apache.hudi.common.fs;
import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;
@@ -29,6 +31,10 @@ import java.util.Properties;
/**
* The consistency guard relevant config options.
*/
@ConfigClassProperty(name = "Consistency Guard Configurations",
groupName = ConfigGroups.Names.WRITE_CLIENT,
description = "The consistency guard related config options, to help talk to eventually consistent object storage."
+ "(Tip: S3 is NOT eventually consistent anymore!)")
public class ConsistencyGuardConfig extends HoodieConfig {
public static final ConfigProperty<String> CONSISTENCY_CHECK_ENABLED_PROP = ConfigProperty

View File

@@ -20,6 +20,8 @@ package org.apache.hudi.common.table;
import org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex;
import org.apache.hudi.common.bootstrap.index.NoOpBootstrapIndex;
import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;
import org.apache.hudi.common.model.HoodieFileFormat;
@@ -53,6 +55,14 @@ import java.util.stream.Collectors;
* @see HoodieTableMetaClient
* @since 0.3.0
*/
@ConfigClassProperty(name = "Table Configurations",
groupName = ConfigGroups.Names.WRITE_CLIENT,
description = "Configurations that persist across writes and read on a Hudi table "
+ " like base, log file formats, table name, creation schema, table version layouts. "
+ " Configurations are loaded from hoodie.properties, these properties are usually set during "
+ "initializing a path as hoodie base path and rarely changes during "
+ "the lifetime of the table. Writers/Queries' configurations are validated against these "
+ " each time for compatibility.")
public class HoodieTableConfig extends HoodieConfig implements Serializable {
private static final Logger LOG = LogManager.getLogger(HoodieTableConfig.class);

View File

@@ -18,6 +18,8 @@
package org.apache.hudi.common.table.view;
import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;
import org.apache.hudi.common.util.ValidationUtils;
@@ -32,6 +34,9 @@ import java.util.stream.Collectors;
/**
* File System View Storage Configurations.
*/
@ConfigClassProperty(name = "File System View Storage Configurations",
groupName = ConfigGroups.Names.WRITE_CLIENT,
description = "Configurations that control how file metadata is stored by Hudi, for transaction processing and queries.")
public class FileSystemViewStorageConfig extends HoodieConfig {
// Property Names

View File

@@ -18,6 +18,8 @@
package org.apache.hudi.configuration;
import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.HoodieConfig;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload;
@@ -44,6 +46,10 @@ import java.util.Set;
*
* <p>It has the options for Hoodie table read and write. It also defines some utilities.
*/
@ConfigClassProperty(name = "Flink Options",
groupName = ConfigGroups.Names.FLINK_SQL,
description = "Flink jobs using the SQL can be configured through the options in WITH clause."
+ " The actual datasource level configs are listed below.")
public class FlinkOptions extends HoodieConfig {
private FlinkOptions() {
}

View File

@@ -17,6 +17,8 @@
package org.apache.hudi.utilities.callback.kafka;
import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;
@@ -25,6 +27,9 @@ import static org.apache.hudi.config.HoodieWriteCommitCallbackConfig.CALLBACK_PR
/**
* Kafka write callback related config.
*/
@ConfigClassProperty(name = "Write commit Kafka callback configs",
groupName = ConfigGroups.Names.WRITE_CLIENT,
description = "Controls notifications sent to Kafka, on events happening to a hudi table.")
public class HoodieWriteCommitKafkaCallbackConfig extends HoodieConfig {
public static final ConfigProperty<String> CALLBACK_KAFKA_BOOTSTRAP_SERVERS = ConfigProperty