1
0

[HUDI-1190] Introduce @PublicAPIClass and @PublicAPIMethod annotations to mark public APIs (#1965)

- Maturity levels one of : evolving, stable, deprecated
- Took a pass and marked out most of the existing public API
This commit is contained in:
vinoth chandar
2020-08-13 23:28:17 -07:00
committed by GitHub
parent 379cf0786f
commit 9bde6d616c
11 changed files with 171 additions and 0 deletions

View File

@@ -18,6 +18,9 @@
package org.apache.hudi.index;
import org.apache.hudi.ApiMaturityLevel;
import org.apache.hudi.PublicAPIClass;
import org.apache.hudi.PublicAPIMethod;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.model.FileSlice;
import org.apache.hudi.common.model.HoodieKey;
@@ -45,6 +48,7 @@ import java.io.Serializable;
/**
* Base class for different types of indexes to determine the mapping from uuid.
*/
@PublicAPIClass(maturity = ApiMaturityLevel.EVOLVING)
public abstract class HoodieIndex<T extends HoodieRecordPayload> implements Serializable {
protected final HoodieWriteConfig config;
@@ -85,6 +89,7 @@ public abstract class HoodieIndex<T extends HoodieRecordPayload> implements Seri
* Checks if the given [Keys] exists in the hoodie table and returns [Key, Option[partitionPath, fileID]] If the
* optional is empty, then the key is not found.
*/
@PublicAPIMethod(maturity = ApiMaturityLevel.STABLE)
public abstract JavaPairRDD<HoodieKey, Option<Pair<String, String>>> fetchRecordLocation(
JavaRDD<HoodieKey> hoodieKeys, final JavaSparkContext jsc, HoodieTable<T> hoodieTable);
@@ -92,6 +97,7 @@ public abstract class HoodieIndex<T extends HoodieRecordPayload> implements Seri
* Looks up the index and tags each incoming record with a location of a file that contains the row (if it is actually
* present).
*/
@PublicAPIMethod(maturity = ApiMaturityLevel.STABLE)
public abstract JavaRDD<HoodieRecord<T>> tagLocation(JavaRDD<HoodieRecord<T>> recordRDD, JavaSparkContext jsc,
HoodieTable<T> hoodieTable) throws HoodieIndexException;
@@ -100,12 +106,14 @@ public abstract class HoodieIndex<T extends HoodieRecordPayload> implements Seri
* <p>
* TODO(vc): We may need to propagate the record as well in a WriteStatus class
*/
@PublicAPIMethod(maturity = ApiMaturityLevel.STABLE)
public abstract JavaRDD<WriteStatus> updateLocation(JavaRDD<WriteStatus> writeStatusRDD, JavaSparkContext jsc,
HoodieTable<T> hoodieTable) throws HoodieIndexException;
/**
* Rollback the efffects of the commit made at instantTime.
*/
@PublicAPIMethod(maturity = ApiMaturityLevel.STABLE)
public abstract boolean rollbackCommit(String instantTime);
/**
@@ -115,6 +123,7 @@ public abstract class HoodieIndex<T extends HoodieRecordPayload> implements Seri
*
* @return whether or not, the index implementation is global in nature
*/
@PublicAPIMethod(maturity = ApiMaturityLevel.STABLE)
public abstract boolean isGlobal();
/**
@@ -123,12 +132,14 @@ public abstract class HoodieIndex<T extends HoodieRecordPayload> implements Seri
*
* @return Returns true/false depending on whether the impl has this capability
*/
@PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
public abstract boolean canIndexLogFiles();
/**
* An index is "implicit" with respect to storage, if just writing new data to a file slice, updates the index as
* well. This is used by storage, to save memory footprint in certain cases.
*/
@PublicAPIMethod(maturity = ApiMaturityLevel.STABLE)
public abstract boolean isImplicitWithStorage();
/**

View File

@@ -0,0 +1,42 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi;
/**
* Indicates how stable a given API method/class is, so user's can plan and set their expectations accordingly.
*/
public enum ApiMaturityLevel {
/**
* New APIs start out in this state. Although enough thought will be given to avoid
* breaking changes to the API in the future, sometimes it might need to change
* based on feedback.
*/
EVOLVING,
/**
* Enough applications/users have picked up the API and we deem it stable. We will strive to never
* break the stability of such APIs within a given major version release.
*/
STABLE,
/**
* New things are born, old things fade away. This holds true for APIs also. Once an API has been
* marked as deprecated, new APIs replacing them (if need be) would be in stable state for users
* to migrate to.
*/
DEPRECATED
}

View File

@@ -0,0 +1,37 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi;
import java.lang.annotation.Documented;
import java.lang.annotation.ElementType;
import java.lang.annotation.Inherited;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
/**
* Annotates a type as a user facing class.
*/
@Inherited
@Documented
@Target(ElementType.TYPE)
@Retention(RetentionPolicy.CLASS)
public @interface PublicAPIClass {
ApiMaturityLevel maturity();
}

View File

@@ -0,0 +1,38 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi;
import java.lang.annotation.Documented;
import java.lang.annotation.ElementType;
import java.lang.annotation.Inherited;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
/**
* Annotates a method, as part of the public contract with user code.
*/
@Inherited
@Documented
@Target(ElementType.METHOD)
@Retention(RetentionPolicy.CLASS)
public @interface PublicAPIMethod {
ApiMaturityLevel maturity();
}

View File

@@ -18,6 +18,9 @@
package org.apache.hudi.common.model;
import org.apache.hudi.ApiMaturityLevel;
import org.apache.hudi.PublicAPIClass;
import org.apache.hudi.PublicAPIMethod;
import org.apache.hudi.common.util.Option;
import org.apache.avro.Schema;
@@ -31,12 +34,14 @@ import java.util.Map;
* Every Hoodie table has an implementation of the <code>HoodieRecordPayload</code> This abstracts out callbacks which
* depend on record specific logic.
*/
@PublicAPIClass(maturity = ApiMaturityLevel.STABLE)
public interface HoodieRecordPayload<T extends HoodieRecordPayload> extends Serializable {
/**
* When more than one HoodieRecord have the same HoodieKey, this function combines them before attempting to
* insert/upsert (if combining turned on in HoodieClientConfig).
*/
@PublicAPIMethod(maturity = ApiMaturityLevel.STABLE)
T preCombine(T another);
/**
@@ -50,6 +55,7 @@ public interface HoodieRecordPayload<T extends HoodieRecordPayload> extends Seri
* @param schema Schema used for record
* @return new combined/merged value to be written back to storage. EMPTY to skip writing this record.
*/
@PublicAPIMethod(maturity = ApiMaturityLevel.STABLE)
Option<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema) throws IOException;
/**
@@ -57,6 +63,7 @@ public interface HoodieRecordPayload<T extends HoodieRecordPayload> extends Seri
* new value for the given HoodieKey, wherein there is no existing record in storage to be combined against. (i.e
* insert) Return EMPTY to skip writing this record.
*/
@PublicAPIMethod(maturity = ApiMaturityLevel.STABLE)
Option<IndexedRecord> getInsertValue(Schema schema) throws IOException;
/**
@@ -64,6 +71,7 @@ public interface HoodieRecordPayload<T extends HoodieRecordPayload> extends Seri
* {@code WriteStatus.markSuccess()} and {@code WriteStatus.markFailure()} in order to compute some aggregate metrics
* using the metadata in the context of a write success or failure.
*/
@PublicAPIMethod(maturity = ApiMaturityLevel.STABLE)
default Option<Map<String, String>> getMetadata() {
return Option.empty();
}

View File

@@ -33,12 +33,14 @@ import java.util.stream.Collectors;
/**
* List of helpers to aid, construction of instanttime for read and write operations using datasource.
*/
@PublicAPIClass(maturity = ApiMaturityLevel.STABLE)
public class HoodieDataSourceHelpers {
/**
* Checks if the Hoodie table has new data since given timestamp. This can be subsequently fed to an incremental
* view read, to perform incremental processing.
*/
@PublicAPIMethod(maturity = ApiMaturityLevel.STABLE)
public static boolean hasNewCommits(FileSystem fs, String basePath, String commitTimestamp) {
return listCommitsSince(fs, basePath, commitTimestamp).size() > 0;
}
@@ -46,6 +48,7 @@ public class HoodieDataSourceHelpers {
/**
* Get a list of instant times that have occurred, from the given instant timestamp.
*/
@PublicAPIMethod(maturity = ApiMaturityLevel.STABLE)
public static List<String> listCommitsSince(FileSystem fs, String basePath, String instantTimestamp) {
HoodieTimeline timeline = allCompletedCommitsCompactions(fs, basePath);
return timeline.findInstantsAfter(instantTimestamp, Integer.MAX_VALUE).getInstants()
@@ -55,6 +58,7 @@ public class HoodieDataSourceHelpers {
/**
* Returns the last successful write operation's instant time.
*/
@PublicAPIMethod(maturity = ApiMaturityLevel.STABLE)
public static String latestCommit(FileSystem fs, String basePath) {
HoodieTimeline timeline = allCompletedCommitsCompactions(fs, basePath);
return timeline.lastInstant().get().getTimestamp();
@@ -64,6 +68,7 @@ public class HoodieDataSourceHelpers {
* Obtain all the commits, compactions that have occurred on the timeline, whose instant times could be fed into the
* datasource options.
*/
@PublicAPIMethod(maturity = ApiMaturityLevel.STABLE)
public static HoodieTimeline allCompletedCommitsCompactions(FileSystem fs, String basePath) {
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs.getConf(), basePath, true);
if (metaClient.getTableType().equals(HoodieTableType.MERGE_ON_READ)) {

View File

@@ -18,7 +18,10 @@
package org.apache.hudi.keygen;
import org.apache.hudi.ApiMaturityLevel;
import org.apache.hudi.AvroConversionHelper;
import org.apache.hudi.PublicAPIClass;
import org.apache.hudi.PublicAPIMethod;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.model.HoodieKey;
@@ -32,6 +35,7 @@ import java.util.List;
/**
* Abstract class to extend for plugging in extraction of {@link HoodieKey} from an Avro record.
*/
@PublicAPIClass(maturity = ApiMaturityLevel.STABLE)
public abstract class KeyGenerator implements Serializable, KeyGeneratorInterface {
private static final String STRUCT_NAME = "hoodieRowTopLevelField";
@@ -47,6 +51,7 @@ public abstract class KeyGenerator implements Serializable, KeyGeneratorInterfac
/**
* Generate a Hoodie Key out of provided generic record.
*/
@PublicAPIMethod(maturity = ApiMaturityLevel.STABLE)
public abstract HoodieKey getKey(GenericRecord record);
/**
@@ -54,6 +59,7 @@ public abstract class KeyGenerator implements Serializable, KeyGeneratorInterfac
*
* @return list of field names, when concatenated make up the record key.
*/
@PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
public List<String> getRecordKeyFieldNames() {
throw new UnsupportedOperationException("Bootstrap not supported for key generator. "
+ "Please override this method in your custom key generator.");
@@ -64,6 +70,7 @@ public abstract class KeyGenerator implements Serializable, KeyGeneratorInterfac
* @param row instance of {@link Row} from which record key is requested.
* @return the record key of interest from {@link Row}.
*/
@PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
public String getRecordKey(Row row) {
if (null == converterFn) {
converterFn = AvroConversionHelper.createConverterToAvro(row.schema(), STRUCT_NAME, NAMESPACE);
@@ -77,6 +84,7 @@ public abstract class KeyGenerator implements Serializable, KeyGeneratorInterfac
* @param row instance of {@link Row} from which partition path is requested
* @return the partition path of interest from {@link Row}.
*/
@PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
public String getPartitionPath(Row row) {
if (null == converterFn) {
converterFn = AvroConversionHelper.createConverterToAvro(row.schema(), STRUCT_NAME, NAMESPACE);

View File

@@ -22,12 +22,17 @@ import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.ApiMaturityLevel;
import org.apache.hudi.PublicAPIClass;
import org.apache.hudi.PublicAPIMethod;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.exception.HoodieException;
/**
* Provide the initial checkpoint for delta streamer.
*/
@PublicAPIClass(maturity = ApiMaturityLevel.EVOLVING)
public abstract class InitialCheckPointProvider {
protected transient Path path;
protected transient FileSystem fs;
@@ -62,5 +67,6 @@ public abstract class InitialCheckPointProvider {
/**
* Get checkpoint string recognizable for delta streamer.
*/
@PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
public abstract String getCheckpoint() throws HoodieException;
}

View File

@@ -18,6 +18,9 @@
package org.apache.hudi.utilities.schema;
import org.apache.hudi.ApiMaturityLevel;
import org.apache.hudi.PublicAPIClass;
import org.apache.hudi.PublicAPIMethod;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.avro.Schema;
@@ -28,6 +31,7 @@ import java.io.Serializable;
/**
* Class to provide schema for reading data and also writing into a Hoodie table.
*/
@PublicAPIClass(maturity = ApiMaturityLevel.STABLE)
public abstract class SchemaProvider implements Serializable {
protected TypedProperties config;
@@ -39,8 +43,10 @@ public abstract class SchemaProvider implements Serializable {
this.jssc = jssc;
}
@PublicAPIMethod(maturity = ApiMaturityLevel.STABLE)
public abstract Schema getSourceSchema();
@PublicAPIMethod(maturity = ApiMaturityLevel.STABLE)
public Schema getTargetSchema() {
// by default, use source schema as target for hoodie table as well
return getSourceSchema();

View File

@@ -18,6 +18,9 @@
package org.apache.hudi.utilities.sources;
import org.apache.hudi.ApiMaturityLevel;
import org.apache.hudi.PublicAPIClass;
import org.apache.hudi.PublicAPIMethod;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.utilities.schema.SchemaProvider;
@@ -30,6 +33,7 @@ import java.io.Serializable;
/**
* Represents a source from which we can tail data. Assumes a constructor that takes properties.
*/
@PublicAPIClass(maturity = ApiMaturityLevel.STABLE)
public abstract class Source<T> implements Serializable {
public enum SourceType {
@@ -57,6 +61,7 @@ public abstract class Source<T> implements Serializable {
this.sourceType = sourceType;
}
@PublicAPIMethod(maturity = ApiMaturityLevel.STABLE)
protected abstract InputBatch<T> fetchNewData(Option<String> lastCkptStr, long sourceLimit);
/**

View File

@@ -18,6 +18,9 @@
package org.apache.hudi.utilities.transform;
import org.apache.hudi.ApiMaturityLevel;
import org.apache.hudi.PublicAPIClass;
import org.apache.hudi.PublicAPIMethod;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.spark.api.java.JavaSparkContext;
@@ -28,6 +31,7 @@ import org.apache.spark.sql.SparkSession;
/**
* Transform source to target dataset before writing.
*/
@PublicAPIClass(maturity = ApiMaturityLevel.STABLE)
public interface Transformer {
/**
@@ -39,5 +43,6 @@ public interface Transformer {
* @param properties Config properties
* @return Transformed Dataset
*/
@PublicAPIMethod(maturity = ApiMaturityLevel.STABLE)
Dataset apply(JavaSparkContext jsc, SparkSession sparkSession, Dataset<Row> rowDataset, TypedProperties properties);
}