diff --git a/hudi-client/src/main/java/org/apache/hudi/index/HoodieIndex.java b/hudi-client/src/main/java/org/apache/hudi/index/HoodieIndex.java index 03a965a95..4043586dd 100644 --- a/hudi-client/src/main/java/org/apache/hudi/index/HoodieIndex.java +++ b/hudi-client/src/main/java/org/apache/hudi/index/HoodieIndex.java @@ -18,6 +18,9 @@ package org.apache.hudi.index; +import org.apache.hudi.ApiMaturityLevel; +import org.apache.hudi.PublicAPIClass; +import org.apache.hudi.PublicAPIMethod; import org.apache.hudi.client.WriteStatus; import org.apache.hudi.common.model.FileSlice; import org.apache.hudi.common.model.HoodieKey; @@ -45,6 +48,7 @@ import java.io.Serializable; /** * Base class for different types of indexes to determine the mapping from uuid. */ +@PublicAPIClass(maturity = ApiMaturityLevel.EVOLVING) public abstract class HoodieIndex implements Serializable { protected final HoodieWriteConfig config; @@ -85,6 +89,7 @@ public abstract class HoodieIndex implements Seri * Checks if the given [Keys] exists in the hoodie table and returns [Key, Option[partitionPath, fileID]] If the * optional is empty, then the key is not found. */ + @PublicAPIMethod(maturity = ApiMaturityLevel.STABLE) public abstract JavaPairRDD>> fetchRecordLocation( JavaRDD hoodieKeys, final JavaSparkContext jsc, HoodieTable hoodieTable); @@ -92,6 +97,7 @@ public abstract class HoodieIndex implements Seri * Looks up the index and tags each incoming record with a location of a file that contains the row (if it is actually * present). */ + @PublicAPIMethod(maturity = ApiMaturityLevel.STABLE) public abstract JavaRDD> tagLocation(JavaRDD> recordRDD, JavaSparkContext jsc, HoodieTable hoodieTable) throws HoodieIndexException; @@ -100,12 +106,14 @@ public abstract class HoodieIndex implements Seri *

* TODO(vc): We may need to propagate the record as well in a WriteStatus class */ + @PublicAPIMethod(maturity = ApiMaturityLevel.STABLE) public abstract JavaRDD updateLocation(JavaRDD writeStatusRDD, JavaSparkContext jsc, HoodieTable hoodieTable) throws HoodieIndexException; /** * Rollback the efffects of the commit made at instantTime. */ + @PublicAPIMethod(maturity = ApiMaturityLevel.STABLE) public abstract boolean rollbackCommit(String instantTime); /** @@ -115,6 +123,7 @@ public abstract class HoodieIndex implements Seri * * @return whether or not, the index implementation is global in nature */ + @PublicAPIMethod(maturity = ApiMaturityLevel.STABLE) public abstract boolean isGlobal(); /** @@ -123,12 +132,14 @@ public abstract class HoodieIndex implements Seri * * @return Returns true/false depending on whether the impl has this capability */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) public abstract boolean canIndexLogFiles(); /** * An index is "implicit" with respect to storage, if just writing new data to a file slice, updates the index as * well. This is used by storage, to save memory footprint in certain cases. */ + @PublicAPIMethod(maturity = ApiMaturityLevel.STABLE) public abstract boolean isImplicitWithStorage(); /** diff --git a/hudi-common/src/main/java/org/apache/hudi/ApiMaturityLevel.java b/hudi-common/src/main/java/org/apache/hudi/ApiMaturityLevel.java new file mode 100644 index 000000000..a1c63075b --- /dev/null +++ b/hudi-common/src/main/java/org/apache/hudi/ApiMaturityLevel.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi; + +/** + * Indicates how stable a given API method/class is, so user's can plan and set their expectations accordingly. + */ +public enum ApiMaturityLevel { + /** + * New APIs start out in this state. Although enough thought will be given to avoid + * breaking changes to the API in the future, sometimes it might need to change + * based on feedback. + */ + EVOLVING, + /** + * Enough applications/users have picked up the API and we deem it stable. We will strive to never + * break the stability of such APIs within a given major version release. + */ + STABLE, + /** + * New things are born, old things fade away. This holds true for APIs also. Once an API has been + * marked as deprecated, new APIs replacing them (if need be) would be in stable state for users + * to migrate to. + */ + DEPRECATED +} diff --git a/hudi-common/src/main/java/org/apache/hudi/PublicAPIClass.java b/hudi-common/src/main/java/org/apache/hudi/PublicAPIClass.java new file mode 100644 index 000000000..723018325 --- /dev/null +++ b/hudi-common/src/main/java/org/apache/hudi/PublicAPIClass.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi; + +import java.lang.annotation.Documented; +import java.lang.annotation.ElementType; +import java.lang.annotation.Inherited; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * Annotates a type as a user facing class. + */ +@Inherited +@Documented +@Target(ElementType.TYPE) +@Retention(RetentionPolicy.CLASS) +public @interface PublicAPIClass { + ApiMaturityLevel maturity(); +} diff --git a/hudi-common/src/main/java/org/apache/hudi/PublicAPIMethod.java b/hudi-common/src/main/java/org/apache/hudi/PublicAPIMethod.java new file mode 100644 index 000000000..2c4ce8607 --- /dev/null +++ b/hudi-common/src/main/java/org/apache/hudi/PublicAPIMethod.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi; + +import java.lang.annotation.Documented; +import java.lang.annotation.ElementType; +import java.lang.annotation.Inherited; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * Annotates a method, as part of the public contract with user code. + */ +@Inherited +@Documented +@Target(ElementType.METHOD) +@Retention(RetentionPolicy.CLASS) +public @interface PublicAPIMethod { + ApiMaturityLevel maturity(); +} + diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecordPayload.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecordPayload.java index d744e62b9..1afdd1b59 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecordPayload.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecordPayload.java @@ -18,6 +18,9 @@ package org.apache.hudi.common.model; +import org.apache.hudi.ApiMaturityLevel; +import org.apache.hudi.PublicAPIClass; +import org.apache.hudi.PublicAPIMethod; import org.apache.hudi.common.util.Option; import org.apache.avro.Schema; @@ -31,12 +34,14 @@ import java.util.Map; * Every Hoodie table has an implementation of the HoodieRecordPayload This abstracts out callbacks which * depend on record specific logic. */ +@PublicAPIClass(maturity = ApiMaturityLevel.STABLE) public interface HoodieRecordPayload extends Serializable { /** * When more than one HoodieRecord have the same HoodieKey, this function combines them before attempting to * insert/upsert (if combining turned on in HoodieClientConfig). */ + @PublicAPIMethod(maturity = ApiMaturityLevel.STABLE) T preCombine(T another); /** @@ -50,6 +55,7 @@ public interface HoodieRecordPayload extends Seri * @param schema Schema used for record * @return new combined/merged value to be written back to storage. EMPTY to skip writing this record. */ + @PublicAPIMethod(maturity = ApiMaturityLevel.STABLE) Option combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema) throws IOException; /** @@ -57,6 +63,7 @@ public interface HoodieRecordPayload extends Seri * new value for the given HoodieKey, wherein there is no existing record in storage to be combined against. (i.e * insert) Return EMPTY to skip writing this record. */ + @PublicAPIMethod(maturity = ApiMaturityLevel.STABLE) Option getInsertValue(Schema schema) throws IOException; /** @@ -64,6 +71,7 @@ public interface HoodieRecordPayload extends Seri * {@code WriteStatus.markSuccess()} and {@code WriteStatus.markFailure()} in order to compute some aggregate metrics * using the metadata in the context of a write success or failure. */ + @PublicAPIMethod(maturity = ApiMaturityLevel.STABLE) default Option> getMetadata() { return Option.empty(); } diff --git a/hudi-spark/src/main/java/org/apache/hudi/HoodieDataSourceHelpers.java b/hudi-spark/src/main/java/org/apache/hudi/HoodieDataSourceHelpers.java index 8d8e7ba20..fccc65d66 100644 --- a/hudi-spark/src/main/java/org/apache/hudi/HoodieDataSourceHelpers.java +++ b/hudi-spark/src/main/java/org/apache/hudi/HoodieDataSourceHelpers.java @@ -33,12 +33,14 @@ import java.util.stream.Collectors; /** * List of helpers to aid, construction of instanttime for read and write operations using datasource. */ +@PublicAPIClass(maturity = ApiMaturityLevel.STABLE) public class HoodieDataSourceHelpers { /** * Checks if the Hoodie table has new data since given timestamp. This can be subsequently fed to an incremental * view read, to perform incremental processing. */ + @PublicAPIMethod(maturity = ApiMaturityLevel.STABLE) public static boolean hasNewCommits(FileSystem fs, String basePath, String commitTimestamp) { return listCommitsSince(fs, basePath, commitTimestamp).size() > 0; } @@ -46,6 +48,7 @@ public class HoodieDataSourceHelpers { /** * Get a list of instant times that have occurred, from the given instant timestamp. */ + @PublicAPIMethod(maturity = ApiMaturityLevel.STABLE) public static List listCommitsSince(FileSystem fs, String basePath, String instantTimestamp) { HoodieTimeline timeline = allCompletedCommitsCompactions(fs, basePath); return timeline.findInstantsAfter(instantTimestamp, Integer.MAX_VALUE).getInstants() @@ -55,6 +58,7 @@ public class HoodieDataSourceHelpers { /** * Returns the last successful write operation's instant time. */ + @PublicAPIMethod(maturity = ApiMaturityLevel.STABLE) public static String latestCommit(FileSystem fs, String basePath) { HoodieTimeline timeline = allCompletedCommitsCompactions(fs, basePath); return timeline.lastInstant().get().getTimestamp(); @@ -64,6 +68,7 @@ public class HoodieDataSourceHelpers { * Obtain all the commits, compactions that have occurred on the timeline, whose instant times could be fed into the * datasource options. */ + @PublicAPIMethod(maturity = ApiMaturityLevel.STABLE) public static HoodieTimeline allCompletedCommitsCompactions(FileSystem fs, String basePath) { HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs.getConf(), basePath, true); if (metaClient.getTableType().equals(HoodieTableType.MERGE_ON_READ)) { diff --git a/hudi-spark/src/main/java/org/apache/hudi/keygen/KeyGenerator.java b/hudi-spark/src/main/java/org/apache/hudi/keygen/KeyGenerator.java index a11bc8492..26efa5eed 100644 --- a/hudi-spark/src/main/java/org/apache/hudi/keygen/KeyGenerator.java +++ b/hudi-spark/src/main/java/org/apache/hudi/keygen/KeyGenerator.java @@ -18,7 +18,10 @@ package org.apache.hudi.keygen; +import org.apache.hudi.ApiMaturityLevel; import org.apache.hudi.AvroConversionHelper; +import org.apache.hudi.PublicAPIClass; +import org.apache.hudi.PublicAPIMethod; import org.apache.hudi.common.config.TypedProperties; import org.apache.hudi.common.model.HoodieKey; @@ -32,6 +35,7 @@ import java.util.List; /** * Abstract class to extend for plugging in extraction of {@link HoodieKey} from an Avro record. */ +@PublicAPIClass(maturity = ApiMaturityLevel.STABLE) public abstract class KeyGenerator implements Serializable, KeyGeneratorInterface { private static final String STRUCT_NAME = "hoodieRowTopLevelField"; @@ -47,6 +51,7 @@ public abstract class KeyGenerator implements Serializable, KeyGeneratorInterfac /** * Generate a Hoodie Key out of provided generic record. */ + @PublicAPIMethod(maturity = ApiMaturityLevel.STABLE) public abstract HoodieKey getKey(GenericRecord record); /** @@ -54,6 +59,7 @@ public abstract class KeyGenerator implements Serializable, KeyGeneratorInterfac * * @return list of field names, when concatenated make up the record key. */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) public List getRecordKeyFieldNames() { throw new UnsupportedOperationException("Bootstrap not supported for key generator. " + "Please override this method in your custom key generator."); @@ -64,6 +70,7 @@ public abstract class KeyGenerator implements Serializable, KeyGeneratorInterfac * @param row instance of {@link Row} from which record key is requested. * @return the record key of interest from {@link Row}. */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) public String getRecordKey(Row row) { if (null == converterFn) { converterFn = AvroConversionHelper.createConverterToAvro(row.schema(), STRUCT_NAME, NAMESPACE); @@ -77,6 +84,7 @@ public abstract class KeyGenerator implements Serializable, KeyGeneratorInterfac * @param row instance of {@link Row} from which partition path is requested * @return the partition path of interest from {@link Row}. */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) public String getPartitionPath(Row row) { if (null == converterFn) { converterFn = AvroConversionHelper.createConverterToAvro(row.schema(), STRUCT_NAME, NAMESPACE); diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/checkpointing/InitialCheckPointProvider.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/checkpointing/InitialCheckPointProvider.java index 7fc8afb54..4b9039e43 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/checkpointing/InitialCheckPointProvider.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/checkpointing/InitialCheckPointProvider.java @@ -22,12 +22,17 @@ import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; + +import org.apache.hudi.ApiMaturityLevel; +import org.apache.hudi.PublicAPIClass; +import org.apache.hudi.PublicAPIMethod; import org.apache.hudi.common.config.TypedProperties; import org.apache.hudi.exception.HoodieException; /** * Provide the initial checkpoint for delta streamer. */ +@PublicAPIClass(maturity = ApiMaturityLevel.EVOLVING) public abstract class InitialCheckPointProvider { protected transient Path path; protected transient FileSystem fs; @@ -62,5 +67,6 @@ public abstract class InitialCheckPointProvider { /** * Get checkpoint string recognizable for delta streamer. */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) public abstract String getCheckpoint() throws HoodieException; } diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaProvider.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaProvider.java index b90f07d92..c653622b9 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaProvider.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaProvider.java @@ -18,6 +18,9 @@ package org.apache.hudi.utilities.schema; +import org.apache.hudi.ApiMaturityLevel; +import org.apache.hudi.PublicAPIClass; +import org.apache.hudi.PublicAPIMethod; import org.apache.hudi.common.config.TypedProperties; import org.apache.avro.Schema; @@ -28,6 +31,7 @@ import java.io.Serializable; /** * Class to provide schema for reading data and also writing into a Hoodie table. */ +@PublicAPIClass(maturity = ApiMaturityLevel.STABLE) public abstract class SchemaProvider implements Serializable { protected TypedProperties config; @@ -39,8 +43,10 @@ public abstract class SchemaProvider implements Serializable { this.jssc = jssc; } + @PublicAPIMethod(maturity = ApiMaturityLevel.STABLE) public abstract Schema getSourceSchema(); + @PublicAPIMethod(maturity = ApiMaturityLevel.STABLE) public Schema getTargetSchema() { // by default, use source schema as target for hoodie table as well return getSourceSchema(); diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/Source.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/Source.java index 6e51442a4..4d25d479a 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/Source.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/Source.java @@ -18,6 +18,9 @@ package org.apache.hudi.utilities.sources; +import org.apache.hudi.ApiMaturityLevel; +import org.apache.hudi.PublicAPIClass; +import org.apache.hudi.PublicAPIMethod; import org.apache.hudi.common.config.TypedProperties; import org.apache.hudi.common.util.Option; import org.apache.hudi.utilities.schema.SchemaProvider; @@ -30,6 +33,7 @@ import java.io.Serializable; /** * Represents a source from which we can tail data. Assumes a constructor that takes properties. */ +@PublicAPIClass(maturity = ApiMaturityLevel.STABLE) public abstract class Source implements Serializable { public enum SourceType { @@ -57,6 +61,7 @@ public abstract class Source implements Serializable { this.sourceType = sourceType; } + @PublicAPIMethod(maturity = ApiMaturityLevel.STABLE) protected abstract InputBatch fetchNewData(Option lastCkptStr, long sourceLimit); /** diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/Transformer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/Transformer.java index 60278d758..8d653d84d 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/Transformer.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/Transformer.java @@ -18,6 +18,9 @@ package org.apache.hudi.utilities.transform; +import org.apache.hudi.ApiMaturityLevel; +import org.apache.hudi.PublicAPIClass; +import org.apache.hudi.PublicAPIMethod; import org.apache.hudi.common.config.TypedProperties; import org.apache.spark.api.java.JavaSparkContext; @@ -28,6 +31,7 @@ import org.apache.spark.sql.SparkSession; /** * Transform source to target dataset before writing. */ +@PublicAPIClass(maturity = ApiMaturityLevel.STABLE) public interface Transformer { /** @@ -39,5 +43,6 @@ public interface Transformer { * @param properties Config properties * @return Transformed Dataset */ + @PublicAPIMethod(maturity = ApiMaturityLevel.STABLE) Dataset apply(JavaSparkContext jsc, SparkSession sparkSession, Dataset rowDataset, TypedProperties properties); }