1
0

[HUDI-3730] Improve meta sync class design and hierarchies (#5854)

* [HUDI-3730] Improve meta sync class design and hierarchies (#5754)
* Implements class design proposed in RFC-55

Co-authored-by: jian.feng <fengjian428@gmial.com>
Co-authored-by: jian.feng <jian.feng@shopee.com>
This commit is contained in:
Shiyan Xu
2022-07-03 04:17:25 -05:00
committed by GitHub
parent c00ea84985
commit c0e1587966
86 changed files with 2977 additions and 2877 deletions

View File

@@ -1,276 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.sync.common;
import java.io.Serializable;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.common.engine.HoodieLocalEngineContext;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.TableSchemaResolver;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.TimelineUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.metadata.HoodieTableMetadataUtil;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.parquet.schema.MessageType;
public abstract class AbstractSyncHoodieClient implements AutoCloseable {
private static final Logger LOG = LogManager.getLogger(AbstractSyncHoodieClient.class);
public static final String HOODIE_LAST_COMMIT_TIME_SYNC = "last_commit_time_sync";
public static final TypeConverter TYPE_CONVERTOR = new TypeConverter() {};
protected final HoodieTableMetaClient metaClient;
protected final HoodieTableType tableType;
protected final FileSystem fs;
private final String basePath;
private final boolean assumeDatePartitioning;
private final boolean useFileListingFromMetadata;
private final boolean withOperationField;
@Deprecated
public AbstractSyncHoodieClient(String basePath, boolean assumeDatePartitioning, boolean useFileListingFromMetadata,
boolean verifyMetadataFileListing, boolean withOperationField, FileSystem fs) {
this(basePath, assumeDatePartitioning, useFileListingFromMetadata, withOperationField, fs);
}
public AbstractSyncHoodieClient(String basePath, boolean assumeDatePartitioning, boolean useFileListingFromMetadata,
boolean withOperationField, FileSystem fs) {
this.metaClient = HoodieTableMetaClient.builder().setConf(fs.getConf()).setBasePath(basePath).setLoadActiveTimelineOnLoad(true).build();
this.tableType = metaClient.getTableType();
this.basePath = basePath;
this.assumeDatePartitioning = assumeDatePartitioning;
this.useFileListingFromMetadata = useFileListingFromMetadata;
this.withOperationField = withOperationField;
this.fs = fs;
}
/**
* Create the table.
* @param tableName The table name.
* @param storageSchema The table schema.
* @param inputFormatClass The input format class of this table.
* @param outputFormatClass The output format class of this table.
* @param serdeClass The serde class of this table.
* @param serdeProperties The serde properties of this table.
* @param tableProperties The table properties for this table.
*/
public abstract void createTable(String tableName, MessageType storageSchema,
String inputFormatClass, String outputFormatClass,
String serdeClass, Map<String, String> serdeProperties,
Map<String, String> tableProperties);
/**
* @deprecated Use {@link #tableExists} instead.
*/
@Deprecated
public abstract boolean doesTableExist(String tableName);
public abstract boolean tableExists(String tableName);
public abstract Option<String> getLastCommitTimeSynced(String tableName);
public abstract void updateLastCommitTimeSynced(String tableName);
public abstract Option<String> getLastReplicatedTime(String tableName);
public abstract void updateLastReplicatedTimeStamp(String tableName, String timeStamp);
public abstract void deleteLastReplicatedTimeStamp(String tableName);
public abstract void addPartitionsToTable(String tableName, List<String> partitionsToAdd);
public abstract void updatePartitionsToTable(String tableName, List<String> changedPartitions);
public abstract void dropPartitions(String tableName, List<String> partitionsToDrop);
public void updateTableProperties(String tableName, Map<String, String> tableProperties) {}
public abstract Map<String, String> getTableSchema(String tableName);
public HoodieTableType getTableType() {
return tableType;
}
public String getBasePath() {
return metaClient.getBasePath();
}
public FileSystem getFs() {
return fs;
}
public boolean isBootstrap() {
return metaClient.getTableConfig().getBootstrapBasePath().isPresent();
}
public void closeQuietly(ResultSet resultSet, Statement stmt) {
try {
if (stmt != null) {
stmt.close();
}
} catch (SQLException e) {
LOG.warn("Could not close the statement opened ", e);
}
try {
if (resultSet != null) {
resultSet.close();
}
} catch (SQLException e) {
LOG.warn("Could not close the resultset opened ", e);
}
}
/**
* Gets the schema for a hoodie table. Depending on the type of table, try to read schema from commit metadata if
* present, else fallback to reading from any file written in the latest commit. We will assume that the schema has
* not changed within a single atomic write.
*
* @return Parquet schema for this table
*/
public MessageType getDataSchema() {
try {
return new TableSchemaResolver(metaClient).getTableParquetSchema();
} catch (Exception e) {
throw new HoodieSyncException("Failed to read data schema", e);
}
}
public boolean isDropPartition() {
try {
Option<HoodieCommitMetadata> hoodieCommitMetadata = HoodieTableMetadataUtil.getLatestCommitMetadata(metaClient);
if (hoodieCommitMetadata.isPresent()
&& WriteOperationType.DELETE_PARTITION.equals(hoodieCommitMetadata.get().getOperationType())) {
return true;
}
} catch (Exception e) {
throw new HoodieSyncException("Failed to get commit metadata", e);
}
return false;
}
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
public List<String> getPartitionsWrittenToSince(Option<String> lastCommitTimeSynced) {
if (!lastCommitTimeSynced.isPresent()) {
LOG.info("Last commit time synced is not known, listing all partitions in " + basePath + ",FS :" + fs);
HoodieLocalEngineContext engineContext = new HoodieLocalEngineContext(metaClient.getHadoopConf());
return FSUtils.getAllPartitionPaths(engineContext, basePath, useFileListingFromMetadata, assumeDatePartitioning);
} else {
LOG.info("Last commit time synced is " + lastCommitTimeSynced.get() + ", Getting commits since then");
return TimelineUtils.getPartitionsWritten(metaClient.getActiveTimeline().getCommitsTimeline()
.findInstantsAfter(lastCommitTimeSynced.get(), Integer.MAX_VALUE));
}
}
public abstract static class TypeConverter implements Serializable {
static final String DEFAULT_TARGET_TYPE = "DECIMAL";
protected String targetType;
public TypeConverter() {
this.targetType = DEFAULT_TARGET_TYPE;
}
public TypeConverter(String targetType) {
ValidationUtils.checkArgument(Objects.nonNull(targetType));
this.targetType = targetType;
}
public void doConvert(ResultSet resultSet, Map<String, String> schema) throws SQLException {
schema.put(getColumnName(resultSet), targetType.equalsIgnoreCase(getColumnType(resultSet))
? convert(resultSet) : getColumnType(resultSet));
}
public String convert(ResultSet resultSet) throws SQLException {
String columnType = getColumnType(resultSet);
int columnSize = resultSet.getInt("COLUMN_SIZE");
int decimalDigits = resultSet.getInt("DECIMAL_DIGITS");
return columnType + String.format("(%s,%s)", columnSize, decimalDigits);
}
public String getColumnName(ResultSet resultSet) throws SQLException {
return resultSet.getString(4);
}
public String getColumnType(ResultSet resultSet) throws SQLException {
return resultSet.getString(6);
}
}
/**
* Read the schema from the log file on path.
*/
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
private MessageType readSchemaFromLogFile(Option<HoodieInstant> lastCompactionCommitOpt, Path path) throws Exception {
MessageType messageType = TableSchemaResolver.readSchemaFromLogFile(fs, path);
// Fall back to read the schema from last compaction
if (messageType == null) {
LOG.info("Falling back to read the schema from last compaction " + lastCompactionCommitOpt);
return new TableSchemaResolver(this.metaClient).readSchemaFromLastCompaction(lastCompactionCommitOpt);
}
return messageType;
}
/**
* Partition Event captures any partition that needs to be added or updated.
*/
public static class PartitionEvent {
public enum PartitionEventType {
ADD, UPDATE, DROP
}
public PartitionEventType eventType;
public String storagePartition;
PartitionEvent(PartitionEventType eventType, String storagePartition) {
this.eventType = eventType;
this.storagePartition = storagePartition;
}
public static PartitionEvent newPartitionAddEvent(String storagePartition) {
return new PartitionEvent(PartitionEventType.ADD, storagePartition);
}
public static PartitionEvent newPartitionUpdateEvent(String storagePartition) {
return new PartitionEvent(PartitionEventType.UPDATE, storagePartition);
}
public static PartitionEvent newPartitionDropEvent(String storagePartition) {
return new PartitionEvent(PartitionEventType.DROP, storagePartition);
}
}
}

View File

@@ -0,0 +1,196 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hudi.sync.common;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.sync.common.model.FieldSchema;
import org.apache.hudi.sync.common.model.Partition;
import org.apache.parquet.schema.MessageType;
import java.util.Collections;
import java.util.List;
import java.util.Map;
public interface HoodieMetaSyncOperations {
String HOODIE_LAST_COMMIT_TIME_SYNC = "last_commit_time_sync";
/**
* Create the table.
*
* @param tableName The table name.
* @param storageSchema The table schema.
* @param inputFormatClass The input format class of this table.
* @param outputFormatClass The output format class of this table.
* @param serdeClass The serde class of this table.
* @param serdeProperties The serde properties of this table.
* @param tableProperties The table properties for this table.
*/
default void createTable(String tableName,
MessageType storageSchema,
String inputFormatClass,
String outputFormatClass,
String serdeClass,
Map<String, String> serdeProperties,
Map<String, String> tableProperties) {
}
/**
* Check if table exists in metastore.
*/
default boolean tableExists(String tableName) {
return false;
}
/**
* Drop table from metastore.
*/
default void dropTable(String tableName) {
}
/**
* Add partitions to the table in metastore.
*/
default void addPartitionsToTable(String tableName, List<String> partitionsToAdd) {
}
/**
* Update partitions to the table in metastore.
*/
default void updatePartitionsToTable(String tableName, List<String> changedPartitions) {
}
/**
* Drop partitions from the table in metastore.
*/
default void dropPartitions(String tableName, List<String> partitionsToDrop) {
}
/**
* Get all partitions for the table in the metastore.
*/
default List<Partition> getAllPartitions(String tableName) {
return Collections.emptyList();
}
/**
* Check if a database already exists in the metastore.
*/
default boolean databaseExists(String databaseName) {
return false;
}
/**
* Create a database in the metastore.
*/
default void createDatabase(String databaseName) {
}
/**
* Get the schema from metastore.
*/
default Map<String, String> getMetastoreSchema(String tableName) {
return Collections.emptyMap();
}
/**
* Get the schema from the Hudi table on storage.
*/
default MessageType getStorageSchema() {
return null;
}
/**
* Update schema for the table in the metastore.
*/
default void updateTableSchema(String tableName, MessageType newSchema) {
}
/**
* Get the list of field schemas from metastore.
*/
default List<FieldSchema> getMetastoreFieldSchemas(String tableName) {
return Collections.emptyList();
}
/**
* Get the list of field schema from the Hudi table on storage.
*/
default List<FieldSchema> getStorageFieldSchemas() {
return Collections.emptyList();
}
/**
* Update the field comments for table in metastore, by using the ones from storage.
*/
default void updateTableComments(String tableName, List<FieldSchema> fromMetastore, List<FieldSchema> fromStorage) {
}
/**
* Get the timestamp of last sync.
*/
default Option<String> getLastCommitTimeSynced(String tableName) {
return Option.empty();
}
/**
* Update the timestamp of last sync.
*/
default void updateLastCommitTimeSynced(String tableName) {
}
/**
* Update the table properties in metastore.
*/
default void updateTableProperties(String tableName, Map<String, String> tableProperties) {
}
/**
* Get the timestamp of last replication.
*/
default Option<String> getLastReplicatedTime(String tableName) {
return Option.empty();
}
/**
* Update the timestamp of last replication.
*/
default void updateLastReplicatedTimeStamp(String tableName, String timeStamp) {
}
/**
* Delete the timestamp of last replication.
*/
default void deleteLastReplicatedTimeStamp(String tableName) {
}
}

View File

@@ -0,0 +1,161 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.sync.common;
import org.apache.hudi.common.engine.HoodieLocalEngineContext;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.TableSchemaResolver;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.timeline.TimelineUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.ReflectionUtils;
import org.apache.hudi.metadata.HoodieTableMetadataUtil;
import org.apache.hudi.sync.common.model.Partition;
import org.apache.hudi.sync.common.model.PartitionEvent;
import org.apache.hudi.sync.common.model.PartitionValueExtractor;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.parquet.schema.MessageType;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_ASSUME_DATE_PARTITION;
import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_BASE_PATH;
import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS;
import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_USE_FILE_LISTING_FROM_METADATA;
public abstract class HoodieSyncClient implements HoodieMetaSyncOperations, AutoCloseable {
private static final Logger LOG = LogManager.getLogger(HoodieSyncClient.class);
protected final HoodieSyncConfig config;
protected final PartitionValueExtractor partitionValueExtractor;
protected final HoodieTableMetaClient metaClient;
public HoodieSyncClient(HoodieSyncConfig config) {
this.config = config;
this.partitionValueExtractor = ReflectionUtils.loadClass(config.getStringOrDefault(META_SYNC_PARTITION_EXTRACTOR_CLASS));
this.metaClient = HoodieTableMetaClient.builder()
.setConf(config.getHadoopConf())
.setBasePath(config.getString(META_SYNC_BASE_PATH))
.setLoadActiveTimelineOnLoad(true)
.build();
}
public HoodieTimeline getActiveTimeline() {
return metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
}
public HoodieTableType getTableType() {
return metaClient.getTableType();
}
public String getBasePath() {
return metaClient.getBasePathV2().toString();
}
public boolean isBootstrap() {
return metaClient.getTableConfig().getBootstrapBasePath().isPresent();
}
public boolean isDropPartition() {
try {
Option<HoodieCommitMetadata> hoodieCommitMetadata = HoodieTableMetadataUtil.getLatestCommitMetadata(metaClient);
if (hoodieCommitMetadata.isPresent()
&& WriteOperationType.DELETE_PARTITION.equals(hoodieCommitMetadata.get().getOperationType())) {
return true;
}
} catch (Exception e) {
throw new HoodieSyncException("Failed to get commit metadata", e);
}
return false;
}
@Override
public MessageType getStorageSchema() {
try {
return new TableSchemaResolver(metaClient).getTableParquetSchema();
} catch (Exception e) {
throw new HoodieSyncException("Failed to read schema from storage.", e);
}
}
public List<String> getPartitionsWrittenToSince(Option<String> lastCommitTimeSynced) {
if (!lastCommitTimeSynced.isPresent()) {
LOG.info("Last commit time synced is not known, listing all partitions in "
+ config.getString(META_SYNC_BASE_PATH)
+ ",FS :" + config.getHadoopFileSystem());
HoodieLocalEngineContext engineContext = new HoodieLocalEngineContext(metaClient.getHadoopConf());
return FSUtils.getAllPartitionPaths(engineContext,
config.getString(META_SYNC_BASE_PATH),
config.getBoolean(META_SYNC_USE_FILE_LISTING_FROM_METADATA),
config.getBoolean(META_SYNC_ASSUME_DATE_PARTITION));
} else {
LOG.info("Last commit time synced is " + lastCommitTimeSynced.get() + ", Getting commits since then");
return TimelineUtils.getPartitionsWritten(metaClient.getActiveTimeline().getCommitsTimeline()
.findInstantsAfter(lastCommitTimeSynced.get(), Integer.MAX_VALUE));
}
}
/**
* Iterate over the storage partitions and find if there are any new partitions that need to be added or updated.
* Generate a list of PartitionEvent based on the changes required.
*/
public List<PartitionEvent> getPartitionEvents(List<Partition> tablePartitions, List<String> partitionStoragePartitions, boolean isDropPartition) {
Map<String, String> paths = new HashMap<>();
for (Partition tablePartition : tablePartitions) {
List<String> hivePartitionValues = tablePartition.getValues();
String fullTablePartitionPath =
Path.getPathWithoutSchemeAndAuthority(new Path(tablePartition.getStorageLocation())).toUri().getPath();
paths.put(String.join(", ", hivePartitionValues), fullTablePartitionPath);
}
List<PartitionEvent> events = new ArrayList<>();
for (String storagePartition : partitionStoragePartitions) {
Path storagePartitionPath = FSUtils.getPartitionPath(config.getString(META_SYNC_BASE_PATH), storagePartition);
String fullStoragePartitionPath = Path.getPathWithoutSchemeAndAuthority(storagePartitionPath).toUri().getPath();
// Check if the partition values or if hdfs path is the same
List<String> storagePartitionValues = partitionValueExtractor.extractPartitionValuesInPath(storagePartition);
if (isDropPartition) {
events.add(PartitionEvent.newPartitionDropEvent(storagePartition));
} else {
if (!storagePartitionValues.isEmpty()) {
String storageValue = String.join(", ", storagePartitionValues);
if (!paths.containsKey(storageValue)) {
events.add(PartitionEvent.newPartitionAddEvent(storagePartition));
} else if (!paths.get(storageValue).equals(fullStoragePartitionPath)) {
events.add(PartitionEvent.newPartitionUpdateEvent(storagePartition));
}
}
}
}
return events;
}
}

View File

@@ -22,14 +22,19 @@ import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;
import org.apache.hudi.common.config.HoodieMetadataConfig;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
import org.apache.hudi.sync.common.util.ConfigUtils;
import com.beust.jcommander.Parameter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import java.util.Collections;
import java.util.List;
import java.util.Properties;
import java.util.function.Function;
/**
@@ -37,41 +42,6 @@ import java.util.function.Function;
*/
public class HoodieSyncConfig extends HoodieConfig {
@Parameter(names = {"--database"}, description = "name of the target database in meta store", required = true)
public String databaseName;
@Parameter(names = {"--table"}, description = "name of the target table in meta store", required = true)
public String tableName;
@Parameter(names = {"--base-path"}, description = "Base path of the hoodie table to sync", required = true)
public String basePath;
@Parameter(names = {"--base-file-format"}, description = "Format of the base files (PARQUET (or) HFILE)")
public String baseFileFormat;
@Parameter(names = "--partitioned-by", description = "Fields in the schema partitioned by")
public List<String> partitionFields;
@Parameter(names = "--partition-value-extractor", description = "Class which implements PartitionValueExtractor "
+ "to extract the partition values from HDFS path")
public String partitionValueExtractorClass;
@Parameter(names = {"--assume-date-partitioning"}, description = "Assume standard yyyy/mm/dd partitioning, this"
+ " exists to support backward compatibility. If you use hoodie 0.3.x, do not set this parameter")
public Boolean assumeDatePartitioning;
@Parameter(names = {"--decode-partition"}, description = "Decode the partition value if the partition has encoded during writing")
public Boolean decodePartition;
@Parameter(names = {"--use-file-listing-from-metadata"}, description = "Fetch file listing from Hudi's metadata")
public Boolean useFileListingFromMetadata;
@Parameter(names = {"--conditional-sync"}, description = "If true, only sync on conditions like schema change or partition change.")
public Boolean isConditionalSync;
@Parameter(names = {"--spark-version"}, description = "The spark version")
public String sparkVersion;
public static final ConfigProperty<String> META_SYNC_BASE_PATH = ConfigProperty
.key("hoodie.datasource.meta.sync.base.path")
.defaultValue("")
@@ -150,6 +120,11 @@ public class HoodieSyncConfig extends HoodieConfig {
.defaultValue("false")
.withDocumentation("Assume partitioning is yyyy/mm/dd");
public static final ConfigProperty<Boolean> META_SYNC_DECODE_PARTITION = ConfigProperty
.key("hoodie.meta.sync.decode_partition")
.defaultValue(false) // TODO infer from url encode option
.withDocumentation("");
public static final ConfigProperty<Boolean> META_SYNC_USE_FILE_LISTING_FROM_METADATA = ConfigProperty
.key("hoodie.meta.sync.metadata_file_listing")
.defaultValue(HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS)
@@ -165,24 +140,85 @@ public class HoodieSyncConfig extends HoodieConfig {
.defaultValue("")
.withDocumentation("The spark version used when syncing with a metastore.");
public HoodieSyncConfig(TypedProperties props) {
super(props);
setDefaults();
private Configuration hadoopConf;
this.basePath = getStringOrDefault(META_SYNC_BASE_PATH);
this.databaseName = getStringOrDefault(META_SYNC_DATABASE_NAME);
this.tableName = getStringOrDefault(META_SYNC_TABLE_NAME);
this.baseFileFormat = getStringOrDefault(META_SYNC_BASE_FILE_FORMAT);
this.partitionFields = props.getStringList(META_SYNC_PARTITION_FIELDS.key(), ",", Collections.emptyList());
this.partitionValueExtractorClass = getStringOrDefault(META_SYNC_PARTITION_EXTRACTOR_CLASS);
this.assumeDatePartitioning = getBooleanOrDefault(META_SYNC_ASSUME_DATE_PARTITION);
this.decodePartition = getBooleanOrDefault(KeyGeneratorOptions.URL_ENCODE_PARTITIONING);
this.useFileListingFromMetadata = getBooleanOrDefault(META_SYNC_USE_FILE_LISTING_FROM_METADATA);
this.isConditionalSync = getBooleanOrDefault(META_SYNC_CONDITIONAL_SYNC);
this.sparkVersion = getStringOrDefault(META_SYNC_SPARK_VERSION);
public HoodieSyncConfig(Properties props) {
this(props, ConfigUtils.createHadoopConf(props));
}
protected void setDefaults() {
this.setDefaultValue(META_SYNC_TABLE_NAME);
public HoodieSyncConfig(Properties props, Configuration hadoopConf) {
super(props);
this.hadoopConf = hadoopConf;
}
public void setHadoopConf(Configuration hadoopConf) {
this.hadoopConf = hadoopConf;
}
public Configuration getHadoopConf() {
return hadoopConf;
}
public FileSystem getHadoopFileSystem() {
return FSUtils.getFs(getString(META_SYNC_BASE_PATH), getHadoopConf());
}
public String getAbsoluteBasePath() {
return getString(META_SYNC_BASE_PATH);
}
@Override
public String toString() {
return props.toString();
}
public static class HoodieSyncConfigParams {
@Parameter(names = {"--database"}, description = "name of the target database in meta store", required = true)
public String databaseName;
@Parameter(names = {"--table"}, description = "name of the target table in meta store", required = true)
public String tableName;
@Parameter(names = {"--base-path"}, description = "Base path of the hoodie table to sync", required = true)
public String basePath;
@Parameter(names = {"--base-file-format"}, description = "Format of the base files (PARQUET (or) HFILE)")
public String baseFileFormat;
@Parameter(names = "--partitioned-by", description = "Fields in the schema partitioned by")
public List<String> partitionFields;
@Parameter(names = "--partition-value-extractor", description = "Class which implements PartitionValueExtractor "
+ "to extract the partition values from HDFS path")
public String partitionValueExtractorClass;
@Parameter(names = {"--assume-date-partitioning"}, description = "Assume standard yyyy/mm/dd partitioning, this"
+ " exists to support backward compatibility. If you use hoodie 0.3.x, do not set this parameter")
public Boolean assumeDatePartitioning;
@Parameter(names = {"--decode-partition"}, description = "Decode the partition value if the partition has encoded during writing")
public Boolean decodePartition;
@Parameter(names = {"--use-file-listing-from-metadata"}, description = "Fetch file listing from Hudi's metadata")
public Boolean useFileListingFromMetadata;
@Parameter(names = {"--conditional-sync"}, description = "If true, only sync on conditions like schema change or partition change.")
public Boolean isConditionalSync;
@Parameter(names = {"--spark-version"}, description = "The spark version")
public String sparkVersion;
@Parameter(names = {"--help", "-h"}, help = true)
public boolean help = false;
public boolean isHelp() {
return help;
}
public TypedProperties toProps() {
final TypedProperties props = new TypedProperties();
props.setPropertyIfNonNull(META_SYNC_BASE_PATH.key(), basePath);
props.setPropertyIfNonNull(META_SYNC_DATABASE_NAME.key(), databaseName);
props.setPropertyIfNonNull(META_SYNC_TABLE_NAME.key(), tableName);
props.setPropertyIfNonNull(META_SYNC_BASE_FILE_FORMAT.key(), baseFileFormat);
props.setPropertyIfNonNull(META_SYNC_PARTITION_FIELDS.key(), StringUtils.join(",", partitionFields));
props.setPropertyIfNonNull(META_SYNC_PARTITION_EXTRACTOR_CLASS.key(), partitionValueExtractorClass);
props.setPropertyIfNonNull(META_SYNC_ASSUME_DATE_PARTITION.key(), assumeDatePartitioning);
props.setPropertyIfNonNull(META_SYNC_DECODE_PARTITION.key(), decodePartition);
props.setPropertyIfNonNull(META_SYNC_USE_FILE_LISTING_FROM_METADATA.key(), useFileListingFromMetadata);
props.setPropertyIfNonNull(META_SYNC_CONDITIONAL_SYNC.key(), isConditionalSync);
props.setPropertyIfNonNull(META_SYNC_SPARK_VERSION.key(), sparkVersion);
return props;
}
}
}

View File

@@ -0,0 +1,62 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.sync.common;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.sync.common.util.ConfigUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import java.util.Properties;
/**
* Base class to sync metadata with metastores to make
* Hudi table queryable through external systems.
*/
public abstract class HoodieSyncTool implements AutoCloseable {
protected Properties props;
protected Configuration hadoopConf;
public HoodieSyncTool(Properties props) {
this(props, ConfigUtils.createHadoopConf(props));
}
public HoodieSyncTool(Properties props, Configuration hadoopConf) {
this.props = props;
this.hadoopConf = hadoopConf;
}
@Deprecated
public HoodieSyncTool(TypedProperties props, Configuration conf, FileSystem fs) {
this(props, conf);
}
@Deprecated
public HoodieSyncTool(Properties props, FileSystem fileSystem) {
this(props, fileSystem.getConf());
}
public abstract void syncHoodieTable();
@Override
public void close() throws Exception {
// no op
}
}

View File

@@ -0,0 +1,83 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hudi.sync.common.model;
import org.apache.hudi.common.util.Option;
import java.util.Objects;
public class FieldSchema {
private final String name;
private String type;
private Option<String> comment;
public FieldSchema(String name, String type) {
this(name, type, Option.empty());
}
public FieldSchema(String name, String type, String comment) {
this(name, type, Option.ofNullable(comment));
}
public FieldSchema(String name, String type, Option<String> comment) {
this.name = name;
this.type = type;
this.comment = comment;
}
public String getName() {
return name;
}
public String getType() {
return type;
}
public Option<String> getComment() {
return comment;
}
public String getCommentOrEmpty() {
return comment.orElse("");
}
public void setType(String type) {
this.type = type;
}
public void setComment(Option<String> comment) {
this.comment = comment;
}
public void setComment(String comment) {
this.comment = Option.ofNullable(comment);
}
public boolean updateComment(FieldSchema another) {
if (Objects.equals(name, another.getName())
&& !Objects.equals(getCommentOrEmpty(), another.getCommentOrEmpty())) {
setComment(another.getComment());
return true;
} else {
return false;
}
}
}

View File

@@ -0,0 +1,50 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hudi.sync.common.model;
/**
* Partition Event captures any partition that needs to be added or updated.
*/
public class PartitionEvent {
public enum PartitionEventType {
ADD, UPDATE, DROP
}
public PartitionEventType eventType;
public String storagePartition;
PartitionEvent(PartitionEventType eventType, String storagePartition) {
this.eventType = eventType;
this.storagePartition = storagePartition;
}
public static PartitionEvent newPartitionAddEvent(String storagePartition) {
return new PartitionEvent(PartitionEventType.ADD, storagePartition);
}
public static PartitionEvent newPartitionUpdateEvent(String storagePartition) {
return new PartitionEvent(PartitionEventType.UPDATE, storagePartition);
}
public static PartitionEvent newPartitionDropEvent(String storagePartition) {
return new PartitionEvent(PartitionEventType.DROP, storagePartition);
}
}

View File

@@ -0,0 +1,34 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hudi.sync.common.model;
import java.io.Serializable;
import java.util.List;
/**
* HDFS Path contain hive partition values for the keys it is partitioned on. This mapping is not straight forward and
* requires a pluggable implementation to extract the partition value from HDFS path.
* <p>
* e.g. Hive table partitioned by datestr=yyyy-mm-dd and hdfs path /app/hoodie/dataset1/YYYY=[yyyy]/MM=[mm]/DD=[dd]
*/
public interface PartitionValueExtractor extends Serializable {
List<String> extractPartitionValuesInPath(String partitionPath);
}

View File

@@ -18,9 +18,13 @@
package org.apache.hudi.sync.common.util;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hadoop.conf.Configuration;
import java.util.HashMap;
import java.util.Map;
import org.apache.hudi.common.util.StringUtils;
import java.util.Properties;
public class ConfigUtils {
/**
@@ -32,6 +36,7 @@ public class ConfigUtils {
/**
* Convert the key-value config to a map.The format of the config
* is a key-value pair just like "k1=v1\nk2=v2\nk3=v3".
*
* @param keyValueConfig
* @return
*/
@@ -49,7 +54,7 @@ public class ConfigUtils {
tableProperties.put(key, value);
} else {
throw new IllegalArgumentException("Bad key-value config: " + keyValue + ", must be the"
+ " format 'key = value'");
+ " format 'key = value'");
}
}
return tableProperties;
@@ -58,6 +63,7 @@ public class ConfigUtils {
/**
* Convert map config to key-value string.The format of the config
* is a key-value pair just like "k1=v1\nk2=v2\nk3=v3".
*
* @param config
* @return
*/
@@ -75,4 +81,10 @@ public class ConfigUtils {
return sb.toString();
}
public static Configuration createHadoopConf(Properties props) {
Configuration hadoopConf = new Configuration();
props.stringPropertyNames().forEach(k -> hadoopConf.set(k, props.getProperty(k)));
return hadoopConf;
}
}

View File

@@ -1,12 +1,13 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -15,15 +16,10 @@
* limitations under the License.
*/
package org.apache.hudi.sync.common;
package org.apache.hudi.sync.common.util;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.sync.common.util.ConfigUtils;
import org.apache.hudi.sync.common.util.Parquet2SparkSchemaUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.parquet.schema.GroupType;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.PrimitiveType;
@@ -33,40 +29,18 @@ import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import static org.apache.parquet.schema.OriginalType.UTF8;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
/**
* Base class to sync Hudi meta data with Metastores to make
* Hudi table queryable through external systems.
*/
public abstract class AbstractSyncTool {
protected final Configuration conf;
protected final FileSystem fs;
protected TypedProperties props;
public AbstractSyncTool(TypedProperties props, Configuration conf, FileSystem fs) {
this.props = props;
this.conf = conf;
this.fs = fs;
}
@Deprecated
public AbstractSyncTool(Properties props, FileSystem fileSystem) {
this(new TypedProperties(props), fileSystem.getConf(), fileSystem);
}
public abstract void syncHoodieTable();
public class SparkDataSourceTableUtils {
/**
* Get Spark Sql related table properties. This is used for spark datasource table.
* @param schema The schema to write to the table.
* @return A new parameters added the spark's table properties.
*/
protected Map<String, String> getSparkTableProperties(List<String> partitionNames, String sparkVersion,
int schemaLengthThreshold, MessageType schema) {
public static Map<String, String> getSparkTableProperties(List<String> partitionNames, String sparkVersion,
int schemaLengthThreshold, MessageType schema) {
// Convert the schema and partition info used by spark sql to hive table properties.
// The following code refers to the spark code in
// https://github.com/apache/spark/blob/master/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -122,7 +96,7 @@ public abstract class AbstractSyncTool {
return sparkProperties;
}
protected Map<String, String> getSparkSerdeProperties(boolean readAsOptimized, String basePath) {
public static Map<String, String> getSparkSerdeProperties(boolean readAsOptimized, String basePath) {
Map<String, String> sparkSerdeProperties = new HashMap<>();
sparkSerdeProperties.put("path", basePath);
sparkSerdeProperties.put(ConfigUtils.IS_QUERY_AS_RO_TABLE, String.valueOf(readAsOptimized));

View File

@@ -22,13 +22,11 @@ package org.apache.hudi.sync.common.util;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.util.ReflectionUtils;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.sync.common.AbstractSyncTool;
import org.apache.hudi.sync.common.HoodieSyncConfig;
import org.apache.hudi.sync.common.HoodieSyncTool;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import java.util.Properties;
@@ -36,13 +34,12 @@ import java.util.Properties;
* Helper class for syncing Hudi commit data with external metastores.
*/
public class SyncUtilHelpers {
private static final Logger LOG = LogManager.getLogger(SyncUtilHelpers.class);
/**
* Create an instance of an implementation of {@link AbstractSyncTool} that will sync all the relevant meta information
* Create an instance of an implementation of {@link HoodieSyncTool} that will sync all the relevant meta information
* with an external metastore such as Hive etc. to ensure Hoodie tables can be queried or read via external systems.
*
* @param metaSyncFQCN The class that implements the sync of the metadata.
* @param metaSyncFQCN The class that implements the sync of the metadata.
* @param props property map.
* @param hadoopConfig Hadoop confs.
* @param fs Filesystem used.
@@ -62,30 +59,40 @@ public class SyncUtilHelpers {
}
}
static AbstractSyncTool instantiateMetaSyncTool(String metaSyncFQCN,
TypedProperties props,
Configuration hadoopConfig,
FileSystem fs,
String targetBasePath,
String baseFileFormat) {
static HoodieSyncTool instantiateMetaSyncTool(String metaSyncFQCN,
TypedProperties props,
Configuration hadoopConfig,
FileSystem fs,
String targetBasePath,
String baseFileFormat) {
TypedProperties properties = new TypedProperties();
properties.putAll(props);
properties.put(HoodieSyncConfig.META_SYNC_BASE_PATH.key(), targetBasePath);
properties.put(HoodieSyncConfig.META_SYNC_BASE_FILE_FORMAT.key(), baseFileFormat);
if (ReflectionUtils.hasConstructor(metaSyncFQCN,
new Class<?>[] {Properties.class, Configuration.class})) {
return ((HoodieSyncTool) ReflectionUtils.loadClass(metaSyncFQCN,
new Class<?>[] {Properties.class, Configuration.class},
properties, hadoopConfig));
} else if (ReflectionUtils.hasConstructor(metaSyncFQCN,
new Class<?>[] {Properties.class})) {
return ((HoodieSyncTool) ReflectionUtils.loadClass(metaSyncFQCN,
new Class<?>[] {Properties.class},
properties));
} else if (ReflectionUtils.hasConstructor(metaSyncFQCN,
new Class<?>[] {TypedProperties.class, Configuration.class, FileSystem.class})) {
return ((AbstractSyncTool) ReflectionUtils.loadClass(metaSyncFQCN,
return ((HoodieSyncTool) ReflectionUtils.loadClass(metaSyncFQCN,
new Class<?>[] {TypedProperties.class, Configuration.class, FileSystem.class},
properties, hadoopConfig, fs));
} else if (ReflectionUtils.hasConstructor(metaSyncFQCN,
new Class<?>[] {Properties.class, FileSystem.class})) {
return ((HoodieSyncTool) ReflectionUtils.loadClass(metaSyncFQCN,
new Class<?>[] {Properties.class, FileSystem.class},
properties, fs));
} else {
LOG.warn("Falling back to deprecated constructor for class: " + metaSyncFQCN);
try {
return ((AbstractSyncTool) ReflectionUtils.loadClass(metaSyncFQCN,
new Class<?>[] {Properties.class, FileSystem.class}, properties, fs));
} catch (Throwable t) {
throw new HoodieException("Could not load meta sync class " + metaSyncFQCN, t);
}
throw new HoodieException("Could not load meta sync class " + metaSyncFQCN
+ ": no valid constructor found.");
}
}
}

View File

@@ -20,16 +20,19 @@ package org.apache.hudi.sync.common.util;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.sync.common.AbstractSyncTool;
import org.apache.hudi.sync.common.HoodieSyncTool;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import java.io.IOException;
import java.util.Properties;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -46,42 +49,44 @@ public class TestSyncUtilHelpers {
hadoopConf = fileSystem.getConf();
}
@Test
public void testCreateValidSyncClass() {
AbstractSyncTool metaSyncTool = SyncUtilHelpers.instantiateMetaSyncTool(
ValidMetaSyncClass.class.getName(),
@ParameterizedTest
@ValueSource(classes = {DummySyncTool1.class, DummySyncTool2.class})
public void testCreateValidSyncClass(Class<?> clazz) {
HoodieSyncTool syncTool = SyncUtilHelpers.instantiateMetaSyncTool(
clazz.getName(),
new TypedProperties(),
hadoopConf,
fileSystem,
BASE_PATH,
BASE_FORMAT
);
assertTrue(metaSyncTool instanceof ValidMetaSyncClass);
assertTrue(clazz.isAssignableFrom(syncTool.getClass()));
}
/**
* Ensure it still works for the deprecated constructor of {@link AbstractSyncTool}
* Ensure it still works for the deprecated constructor of {@link HoodieSyncTool}
* as we implemented the fallback.
*/
@Test
public void testCreateDeprecatedSyncClass() {
@ParameterizedTest
@ValueSource(classes = {DeprecatedSyncTool1.class, DeprecatedSyncTool2.class})
public void testCreateDeprecatedSyncClass(Class<?> clazz) {
Properties properties = new Properties();
AbstractSyncTool deprecatedMetaSyncClass = SyncUtilHelpers.instantiateMetaSyncTool(
DeprecatedMetaSyncClass.class.getName(),
HoodieSyncTool syncTool = SyncUtilHelpers.instantiateMetaSyncTool(
clazz.getName(),
new TypedProperties(properties),
hadoopConf,
fileSystem,
BASE_PATH,
BASE_FORMAT
);
assertTrue(deprecatedMetaSyncClass instanceof DeprecatedMetaSyncClass);
assertTrue(clazz.isAssignableFrom(syncTool.getClass()));
}
@Test
public void testCreateInvalidSyncClass() {
Exception exception = assertThrows(HoodieException.class, () -> {
Throwable t = assertThrows(HoodieException.class, () -> {
SyncUtilHelpers.instantiateMetaSyncTool(
InvalidSyncClass.class.getName(),
InvalidSyncTool.class.getName(),
new TypedProperties(),
hadoopConf,
fileSystem,
@@ -90,14 +95,14 @@ public class TestSyncUtilHelpers {
);
});
String expectedMessage = "Could not load meta sync class " + InvalidSyncClass.class.getName();
assertTrue(exception.getMessage().contains(expectedMessage));
String expectedMessage = "Could not load meta sync class " + InvalidSyncTool.class.getName()
+ ": no valid constructor found.";
assertEquals(expectedMessage, t.getMessage());
}
public static class ValidMetaSyncClass extends AbstractSyncTool {
public ValidMetaSyncClass(TypedProperties props, Configuration conf, FileSystem fs) {
super(props, conf, fs);
public static class DummySyncTool1 extends HoodieSyncTool {
public DummySyncTool1(Properties props, Configuration hadoopConf) {
super(props, hadoopConf);
}
@Override
@@ -106,9 +111,9 @@ public class TestSyncUtilHelpers {
}
}
public static class DeprecatedMetaSyncClass extends AbstractSyncTool {
public DeprecatedMetaSyncClass(Properties props, FileSystem fileSystem) {
super(props, fileSystem);
public static class DummySyncTool2 extends HoodieSyncTool {
public DummySyncTool2(Properties props, Configuration hadoopConf) {
super(props, hadoopConf);
}
@Override
@@ -117,8 +122,30 @@ public class TestSyncUtilHelpers {
}
}
public static class InvalidSyncClass {
public InvalidSyncClass(Properties props) {
public static class DeprecatedSyncTool1 extends HoodieSyncTool {
public DeprecatedSyncTool1(TypedProperties props, Configuration hadoopConf, FileSystem fs) {
super(props, hadoopConf, fs);
}
@Override
public void syncHoodieTable() {
throw new HoodieException("Method unimplemented as its a test class");
}
}
public static class DeprecatedSyncTool2 extends HoodieSyncTool {
public DeprecatedSyncTool2(Properties props, FileSystem fs) {
super(props, fs);
}
@Override
public void syncHoodieTable() {
throw new HoodieException("Method unimplemented as its a test class");
}
}
public static class InvalidSyncTool {
public InvalidSyncTool(Properties props, FileSystem fs, Configuration hadoopConf) {
}
}
}