[HUDI-3563] Make quickstart examples covered by CI tests (#5082)

2022-03-25 16:37:17 +08:00
parent f20c9867d7
commit e5c3f9089b
38 changed files with 2980 additions and 225 deletions
--- a/hudi-examples/hudi-examples-common/pom.xml
+++ b/hudi-examples/hudi-examples-common/pom.xml
@@ -0,0 +1,109 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <parent>
+        <artifactId>hudi-examples</artifactId>
+        <groupId>org.apache.hudi</groupId>
+        <version>0.11.0-SNAPSHOT</version>
+    </parent>
+    <modelVersion>4.0.0</modelVersion>
+
+    <artifactId>hudi-examples-common</artifactId>
+
+    <properties>
+        <main.basedir>${project.parent.basedir}</main.basedir>
+        <checkstyle.skip>true</checkstyle.skip>
+    </properties>
+
+    <build>
+        <resources>
+            <resource>
+                <directory>src/main/resources</directory>
+            </resource>
+        </resources>
+
+        <plugins>
+            <plugin>
+                <groupId>net.alchim31.maven</groupId>
+                <artifactId>scala-maven-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>scala-compile-first</id>
+                        <phase>process-resources</phase>
+                        <goals>
+                            <goal>add-source</goal>
+                            <goal>compile</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <phase>compile</phase>
+                        <goals>
+                            <goal>compile</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-jar-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <goals>
+                            <goal>test-jar</goal>
+                        </goals>
+                        <phase>test-compile</phase>
+                    </execution>
+                </executions>
+                <configuration>
+                    <skip>false</skip>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.rat</groupId>
+                <artifactId>apache-rat-plugin</artifactId>
+            </plugin>
+        </plugins>
+    </build>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-common</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
+        <!-- Avro -->
+        <dependency>
+            <groupId>org.apache.avro</groupId>
+            <artifactId>avro</artifactId>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.parquet</groupId>
+            <artifactId>parquet-avro</artifactId>
+        </dependency>
+    </dependencies>
+</project>
--- a/hudi-examples/hudi-examples-common/src/main/java/org/apache/hudi/examples/common/HoodieExampleDataGenerator.java
+++ b/hudi-examples/hudi-examples-common/src/main/java/org/apache/hudi/examples/common/HoodieExampleDataGenerator.java
@@ -43,7 +43,6 @@ import java.util.stream.Collectors;
 import java.util.stream.IntStream;
 import java.util.stream.Stream;

-
 /**
 * Class to be used to generate test data.
 */
@@ -63,7 +62,7 @@ public class HoodieExampleDataGenerator<T extends HoodieRecordPayload<T>> {
          + "{\"name\":\"fare\",\"type\": \"double\"}]}";
  public static Schema avroSchema = new Schema.Parser().parse(TRIP_EXAMPLE_SCHEMA);

-  private static Random rand = new Random(46474747);
+  private static final Random rand = new Random(46474747);

  private final Map<Integer, KeyPartition> existingKeys;
  private final String[] partitionPaths;
--- a/hudi-examples/hudi-examples-flink/pom.xml
+++ b/hudi-examples/hudi-examples-flink/pom.xml
@@ -0,0 +1,364 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <parent>
+        <artifactId>hudi-examples</artifactId>
+        <groupId>org.apache.hudi</groupId>
+        <version>0.11.0-SNAPSHOT</version>
+    </parent>
+    <modelVersion>4.0.0</modelVersion>
+
+    <artifactId>hudi-examples-flink</artifactId>
+
+    <properties>
+        <main.basedir>${project.parent.basedir}</main.basedir>
+        <checkstyle.skip>true</checkstyle.skip>
+        <parquet.version>1.11.1</parquet.version>
+    </properties>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.jacoco</groupId>
+                <artifactId>jacoco-maven-plugin</artifactId>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <configuration>
+                    <source>1.8</source>
+                    <target>1.8</target>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-jar-plugin</artifactId>
+                <version>3.1.2</version>
+                <executions>
+                    <execution>
+                        <goals>
+                            <goal>test-jar</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.rat</groupId>
+                <artifactId>apache-rat-plugin</artifactId>
+            </plugin>
+        </plugins>
+
+        <resources>
+            <resource>
+                <directory>src/main/resources</directory>
+            </resource>
+            <resource>
+                <directory>src/test/resources</directory>
+            </resource>
+        </resources>
+    </build>
+
+    <dependencies>
+        <!-- Hoodie -->
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-common</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-client-common</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-flink-client</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-hadoop-mr</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-hive-sync</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-sync-common</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-flink</artifactId>
+            <version>${project.version}</version>
+            <scope>compile</scope>
+        </dependency>
+
+        <!-- Flink -->
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
+            <scope>compile</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-clients_${scala.binary.version}</artifactId>
+            <scope>compile</scope>
+            <exclusions>
+                <exclusion>
+                    <groupId>com.esotericsoftware.kryo</groupId>
+                    <artifactId>kryo</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>com.esotericsoftware.minlog</groupId>
+                    <artifactId>minlog</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-connector-kafka_${scala.binary.version}</artifactId>
+            <scope>compile</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.kafka</groupId>
+            <artifactId>kafka-clients</artifactId>
+            <version>${kafka.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-hadoop-compatibility_${scala.binary.version}</artifactId>
+            <version>${flink.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-parquet_${scala.binary.version}</artifactId>
+            <version>${flink.version}</version>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-json</artifactId>
+            <version>${flink.version}</version>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-table-common</artifactId>
+            <version>${flink.version}</version>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-table-runtime_${scala.binary.version}</artifactId>
+            <version>${flink.version}</version>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-table-planner_${scala.binary.version}</artifactId>
+            <version>${flink.version}</version>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-statebackend-rocksdb_${scala.binary.version}</artifactId>
+            <version>${flink.version}</version>
+            <scope>provided</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.parquet</groupId>
+            <artifactId>parquet-hadoop</artifactId>
+            <version>${parquet.version}</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>org.xerial.snappy</groupId>
+                    <artifactId>snappy-java</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <!-- Avro -->
+        <dependency>
+            <groupId>org.apache.avro</groupId>
+            <artifactId>avro</artifactId>
+            <!-- Override the version to be same with Flink avro -->
+            <version>1.10.0</version>
+            <scope>compile</scope>
+        </dependency>
+
+        <!-- Hadoop -->
+        <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-mapreduce-client-core</artifactId>
+            <scope>compile</scope>
+            <exclusions>
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>slf4j-log4j12</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <dependency>
+            <groupId>com.beust</groupId>
+            <artifactId>jcommander</artifactId>
+            <scope>compile</scope>
+        </dependency>
+        <dependency>
+            <groupId>com.twitter</groupId>
+            <artifactId>bijection-avro_${scala.binary.version}</artifactId>
+            <version>0.9.7</version>
+        </dependency>
+        <dependency>
+            <groupId>joda-time</groupId>
+            <artifactId>joda-time</artifactId>
+            <version>2.5</version>
+        </dependency>
+        <!-- Hive -->
+        <dependency>
+            <groupId>${hive.groupid}</groupId>
+            <artifactId>hive-exec</artifactId>
+            <version>${hive.version}</version>
+            <classifier>${hive.exec.classifier}</classifier>
+            <exclusions>
+                <exclusion>
+                    <groupId>javax.mail</groupId>
+                    <artifactId>mail</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.eclipse.jetty.aggregate</groupId>
+                    <artifactId>*</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <!-- Test dependencies -->
+
+        <!-- Junit 5 dependencies -->
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-api</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.junit.vintage</groupId>
+            <artifactId>junit-vintage-engine</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-params</artifactId>
+            <scope>test</scope>
+        </dependency>
+
+        <!-- Hoodie dependencies -->
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-common</artifactId>
+            <version>${project.version}</version>
+            <classifier>tests</classifier>
+            <type>test-jar</type>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-client-common</artifactId>
+            <version>${project.version}</version>
+            <classifier>tests</classifier>
+            <type>test-jar</type>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-flink-client</artifactId>
+            <version>${project.version}</version>
+            <classifier>tests</classifier>
+            <type>test-jar</type>
+            <scope>test</scope>
+        </dependency>
+<!--        <dependency>-->
+<!--            <groupId>org.apache.hudi</groupId>-->
+<!--            <artifactId>hudi-flink_${scala.binary.version}</artifactId>-->
+<!--            <version>${project.version}</version>-->
+<!--            <scope>test</scope>-->
+<!--        </dependency>-->
+
+        <!-- Flink dependencies -->
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-test-utils_${scala.binary.version}</artifactId>
+            <version>${flink.version}</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-runtime</artifactId>
+            <version>${flink.version}</version>
+            <scope>test</scope>
+            <type>test-jar</type>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
+            <version>${flink.version}</version>
+            <scope>test</scope>
+            <type>test-jar</type>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-table-runtime_${scala.binary.version}</artifactId>
+            <version>${flink.version}</version>
+            <scope>test</scope>
+            <type>test-jar</type>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-json</artifactId>
+            <version>${flink.version}</version>
+            <scope>test</scope>
+            <type>test-jar</type>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-csv</artifactId>
+            <version>${flink.version}</version>
+            <scope>test</scope>
+        </dependency>
+
+        <!-- Parquet Test-->
+        <dependency>
+            <groupId>org.apache.parquet</groupId>
+            <artifactId>parquet-avro</artifactId>
+            <version>${parquet.version}</version>
+            <scope>test</scope>
+        </dependency>
+    </dependencies>
+</project>
--- a/hudi-examples/hudi-examples-flink/src/main/java/org/apache/hudi/examples/quickstart/HoodieFlinkQuickstart.java
+++ b/hudi-examples/hudi-examples-flink/src/main/java/org/apache/hudi/examples/quickstart/HoodieFlinkQuickstart.java
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.examples.quickstart;
+
+import static org.apache.hudi.examples.quickstart.utils.QuickstartConfigurations.sql;
+import java.util.Collection;
+import java.util.List;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.core.execution.JobClient;
+import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
+import org.apache.flink.table.api.EnvironmentSettings;
+import org.apache.flink.table.api.TableEnvironment;
+import org.apache.flink.table.api.TableResult;
+import org.apache.flink.table.api.TableSchema;
+import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
+import org.apache.flink.table.api.config.ExecutionConfigOptions;
+import org.apache.flink.table.api.internal.TableEnvironmentImpl;
+import org.apache.flink.table.catalog.ObjectPath;
+import org.apache.flink.table.catalog.exceptions.TableNotExistException;
+import org.apache.flink.types.Row;
+import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.configuration.FlinkOptions;
+import org.apache.hudi.examples.quickstart.factory.CollectSinkTableFactory;
+import org.apache.hudi.examples.quickstart.utils.QuickstartConfigurations;
+import org.jetbrains.annotations.NotNull;
+
+public final class HoodieFlinkQuickstart {
+  private EnvironmentSettings settings = null;
+  private TableEnvironment streamTableEnv = null;
+
+  private String tableName;
+
+  private HoodieFlinkQuickstart() {
+  }
+
+  public static HoodieFlinkQuickstart instance() {
+    return new HoodieFlinkQuickstart();
+  }
+
+  public static void main(String[] args) throws TableNotExistException, InterruptedException {
+    if (args.length < 3) {
+      System.err.println("Usage: HoodieWriteClientExample <tablePath> <tableName> <tableType>");
+      System.exit(1);
+    }
+    String tablePath = args[0];
+    String tableName = args[1];
+    String tableType = args[2];
+
+    HoodieFlinkQuickstart flinkQuickstart = instance();
+    flinkQuickstart.initEnv();
+
+    // create filesystem table named source
+    flinkQuickstart.createFileSource();
+
+    // create hudi table
+    flinkQuickstart.createHudiTable(tablePath, tableName, HoodieTableType.valueOf(tableType));
+
+    // insert data
+    flinkQuickstart.insertData();
+
+    // query data
+    flinkQuickstart.queryData();
+
+    // update data
+    flinkQuickstart.updateData();
+  }
+
+  public void initEnv() {
+    if (this.streamTableEnv == null) {
+      settings = EnvironmentSettings.newInstance().build();
+      TableEnvironment streamTableEnv = TableEnvironmentImpl.create(settings);
+      streamTableEnv.getConfig().getConfiguration()
+          .setInteger(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM, 1);
+      Configuration execConf = streamTableEnv.getConfig().getConfiguration();
+      execConf.setString("execution.checkpointing.interval", "2s");
+      // configure not to retry after failure
+      execConf.setString("restart-strategy", "fixed-delay");
+      execConf.setString("restart-strategy.fixed-delay.attempts", "0");
+      this.streamTableEnv = streamTableEnv;
+    }
+  }
+
+  public TableEnvironment getStreamTableEnv() {
+    return streamTableEnv;
+  }
+
+  public TableEnvironment getBatchTableEnv() {
+    Configuration conf = new Configuration();
+    // for batch upsert use cases: current suggestion is to disable these 2 options,
+    // from 1.14, flink runtime execution mode has switched from streaming
+    // to batch for batch execution mode(before that, both streaming and batch use streaming execution mode),
+    // current batch execution mode has these limitations:
+    //
+    // 1. the keyed stream default to always sort the inputs by key;
+    // 2. the batch state-backend requires the inputs sort by state key
+    //
+    // For our hudi batch pipeline upsert case, we rely on the consuming sequence for index records and data records,
+    // the index records must be loaded first before data records for BucketAssignFunction to keep upsert semantics correct,
+    // so we suggest disabling these 2 options to use streaming state-backend for batch execution mode
+    // to keep the strategy before 1.14.
+    conf.setBoolean("execution.sorted-inputs.enabled", false);
+    conf.setBoolean("execution.batch-state-backend.enabled", false);
+    StreamExecutionEnvironment execEnv = StreamExecutionEnvironment.getExecutionEnvironment(conf);
+    settings = EnvironmentSettings.newInstance().inBatchMode().build();
+    TableEnvironment batchTableEnv = StreamTableEnvironment.create(execEnv, settings);
+    batchTableEnv.getConfig().getConfiguration()
+        .setInteger(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM, 1);
+    return batchTableEnv;
+  }
+
+  public void createHudiTable(String tablePath, String tableName,
+                              HoodieTableType tableType) {
+    this.tableName = tableName;
+
+    // create hudi table
+    String hoodieTableDDL = sql(tableName)
+        .option(FlinkOptions.PATH, tablePath)
+        .option(FlinkOptions.READ_AS_STREAMING, true)
+        .option(FlinkOptions.TABLE_TYPE, tableType)
+        .end();
+    streamTableEnv.executeSql(hoodieTableDDL);
+  }
+
+  public void createFileSource() {
+    // create filesystem table named source
+    String createSource = QuickstartConfigurations.getFileSourceDDL("source");
+    streamTableEnv.executeSql(createSource);
+  }
+
+  @NotNull List<Row> insertData() throws InterruptedException, TableNotExistException {
+    // insert data
+    String insertInto = String.format("insert into %s select * from source", tableName);
+    execInsertSql(streamTableEnv, insertInto);
+    return queryData();
+  }
+
+  List<Row> queryData() throws InterruptedException, TableNotExistException {
+    // query data
+    // reading from the latest commit instance.
+    return execSelectSql(streamTableEnv, String.format("select * from %s", tableName), 10);
+  }
+
+  @NotNull List<Row> updateData() throws InterruptedException, TableNotExistException {
+    // update data
+    String insertInto = String.format("insert into %s select * from source", tableName);
+    execInsertSql(getStreamTableEnv(), insertInto);
+    return queryData();
+  }
+
+  public static void execInsertSql(TableEnvironment tEnv, String insert) {
+    TableResult tableResult = tEnv.executeSql(insert);
+    // wait to finish
+    try {
+      tableResult.getJobClient().get().getJobExecutionResult().get();
+    } catch (InterruptedException | ExecutionException ex) {
+      // ignored
+    }
+  }
+
+  public static List<Row> execSelectSql(TableEnvironment tEnv, String select, long timeout)
+      throws InterruptedException, TableNotExistException {
+    return execSelectSql(tEnv, select, timeout, null);
+  }
+
+  public static List<Row> execSelectSql(TableEnvironment tEnv, String select, long timeout, String sourceTable)
+      throws InterruptedException, TableNotExistException {
+    final String sinkDDL;
+    if (sourceTable != null) {
+      // use the source table schema as the sink schema if the source table was specified, .
+      ObjectPath objectPath = new ObjectPath(tEnv.getCurrentDatabase(), sourceTable);
+      TableSchema schema = tEnv.getCatalog(tEnv.getCurrentCatalog()).get().getTable(objectPath).getSchema();
+      sinkDDL = QuickstartConfigurations.getCollectSinkDDL("sink", schema);
+    } else {
+      sinkDDL = QuickstartConfigurations.getCollectSinkDDL("sink");
+    }
+    return execSelectSql(tEnv, select, sinkDDL, timeout);
+  }
+
+  public static List<Row> execSelectSql(TableEnvironment tEnv, String select, String sinkDDL, long timeout)
+      throws InterruptedException {
+    tEnv.executeSql("DROP TABLE IF EXISTS sink");
+    tEnv.executeSql(sinkDDL);
+    TableResult tableResult = tEnv.executeSql("insert into sink " + select);
+    // wait for the timeout then cancels the job
+    TimeUnit.SECONDS.sleep(timeout);
+    tableResult.getJobClient().ifPresent(JobClient::cancel);
+    tEnv.executeSql("DROP TABLE IF EXISTS sink");
+    return CollectSinkTableFactory.RESULT.values().stream()
+        .flatMap(Collection::stream)
+        .collect(Collectors.toList());
+  }
+}
--- a/hudi-examples/hudi-examples-flink/src/main/java/org/apache/hudi/examples/quickstart/factory/CollectSinkTableFactory.java
+++ b/hudi-examples/hudi-examples-flink/src/main/java/org/apache/hudi/examples/quickstart/factory/CollectSinkTableFactory.java
@@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.examples.quickstart.factory;
+
+import org.apache.flink.api.common.state.ListState;
+import org.apache.flink.api.common.state.ListStateDescriptor;
+import org.apache.flink.api.java.typeutils.RowTypeInfo;
+import org.apache.flink.configuration.ConfigOption;
+import org.apache.flink.runtime.state.FunctionInitializationContext;
+import org.apache.flink.runtime.state.FunctionSnapshotContext;
+import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction;
+import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
+import org.apache.flink.table.api.TableSchema;
+import org.apache.flink.table.connector.ChangelogMode;
+import org.apache.flink.table.connector.sink.DynamicTableSink;
+import org.apache.flink.table.connector.sink.SinkFunctionProvider;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.factories.DynamicTableSinkFactory;
+import org.apache.flink.table.factories.FactoryUtil;
+import org.apache.flink.table.types.DataType;
+import org.apache.flink.table.types.utils.TypeConversions;
+import org.apache.flink.types.Row;
+import org.apache.flink.types.RowKind;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Factory for CollectTableSink.
+ *
+ * <p>Note: The CollectTableSink collects all the data of a table into a global collection {@code RESULT},
+ * so the tests should executed in single thread and the table name should be the same.
+ */
+public class CollectSinkTableFactory implements DynamicTableSinkFactory {
+  public static final String FACTORY_ID = "collect";
+
+  // global results to collect and query
+  public static final Map<Integer, List<Row>> RESULT = new HashMap<>();
+
+  @Override
+  public DynamicTableSink createDynamicTableSink(Context context) {
+    FactoryUtil.TableFactoryHelper helper = FactoryUtil.createTableFactoryHelper(this, context);
+    helper.validate();
+
+    TableSchema schema = context.getCatalogTable().getSchema();
+    RESULT.clear();
+    return new CollectTableSink(schema, context.getObjectIdentifier().getObjectName());
+  }
+
+  @Override
+  public String factoryIdentifier() {
+    return FACTORY_ID;
+  }
+
+  @Override
+  public Set<ConfigOption<?>> requiredOptions() {
+    return Collections.emptySet();
+  }
+
+  @Override
+  public Set<ConfigOption<?>> optionalOptions() {
+    return Collections.emptySet();
+  }
+
+  // --------------------------------------------------------------------------------------------
+  // Table sinks
+  // --------------------------------------------------------------------------------------------
+
+  /**
+   * Values {@link DynamicTableSink} for testing.
+   */
+  private static class CollectTableSink implements DynamicTableSink {
+
+    private final TableSchema schema;
+    private final String tableName;
+
+    private CollectTableSink(
+        TableSchema schema,
+        String tableName) {
+      this.schema = schema;
+      this.tableName = tableName;
+    }
+
+    @Override
+    public ChangelogMode getChangelogMode(ChangelogMode requestedMode) {
+      return ChangelogMode.newBuilder()
+          .addContainedKind(RowKind.INSERT)
+          .addContainedKind(RowKind.DELETE)
+          .addContainedKind(RowKind.UPDATE_AFTER)
+          .build();
+    }
+
+    @Override
+    public SinkRuntimeProvider getSinkRuntimeProvider(Context context) {
+      final DataType rowType = schema.toPhysicalRowDataType();
+      final RowTypeInfo rowTypeInfo = (RowTypeInfo) TypeConversions.fromDataTypeToLegacyInfo(rowType);
+      DataStructureConverter converter = context.createDataStructureConverter(schema.toPhysicalRowDataType());
+      return SinkFunctionProvider.of(new CollectSinkFunction(converter, rowTypeInfo));
+    }
+
+    @Override
+    public DynamicTableSink copy() {
+      return new CollectTableSink(schema, tableName);
+    }
+
+    @Override
+    public String asSummaryString() {
+      return "CollectSink";
+    }
+  }
+
+  static class CollectSinkFunction extends RichSinkFunction<RowData> implements CheckpointedFunction {
+
+    private static final long serialVersionUID = 1L;
+    private final DynamicTableSink.DataStructureConverter converter;
+    private final RowTypeInfo rowTypeInfo;
+
+    protected transient ListState<Row> resultState;
+    protected transient List<Row> localResult;
+
+    private int taskID;
+
+    protected CollectSinkFunction(DynamicTableSink.DataStructureConverter converter, RowTypeInfo rowTypeInfo) {
+      this.converter = converter;
+      this.rowTypeInfo = rowTypeInfo;
+    }
+
+    @Override
+    public void invoke(RowData value, Context context) {
+      Row row = (Row) converter.toExternal(value);
+      assert row != null;
+      row.setKind(value.getRowKind());
+      RESULT.get(taskID).add(row);
+    }
+
+    @Override
+    public void initializeState(FunctionInitializationContext context) throws Exception {
+      this.resultState = context.getOperatorStateStore().getListState(
+          new ListStateDescriptor<>("sink-results", rowTypeInfo));
+      this.localResult = new ArrayList<>();
+      if (context.isRestored()) {
+        for (Row value : resultState.get()) {
+          localResult.add(value);
+        }
+      }
+      this.taskID = getRuntimeContext().getIndexOfThisSubtask();
+      synchronized (CollectSinkTableFactory.class) {
+        RESULT.put(taskID, localResult);
+      }
+    }
+
+    @Override
+    public void snapshotState(FunctionSnapshotContext context) throws Exception {
+      resultState.clear();
+      resultState.addAll(RESULT.get(taskID));
+    }
+  }
+}
--- a/hudi-examples/hudi-examples-flink/src/main/java/org/apache/hudi/examples/quickstart/factory/ContinuousFileSourceFactory.java
+++ b/hudi-examples/hudi-examples-flink/src/main/java/org/apache/hudi/examples/quickstart/factory/ContinuousFileSourceFactory.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.examples.quickstart.factory;
+
+import org.apache.flink.configuration.ConfigOption;
+import org.apache.flink.configuration.ConfigOptions;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.core.fs.Path;
+import org.apache.flink.table.api.ValidationException;
+import org.apache.flink.table.connector.source.DynamicTableSource;
+import org.apache.flink.table.factories.DynamicTableSourceFactory;
+import org.apache.flink.table.factories.FactoryUtil;
+
+import java.util.Collections;
+import java.util.Set;
+import org.apache.hudi.configuration.FlinkOptions;
+import org.apache.hudi.examples.quickstart.source.ContinuousFileSource;
+
+/**
+ * Factory for ContinuousFileSource.
+ */
+public class ContinuousFileSourceFactory implements DynamicTableSourceFactory {
+  public static final String FACTORY_ID = "continuous-file-source";
+
+  public static final ConfigOption<Integer> CHECKPOINTS = ConfigOptions
+      .key("checkpoints")
+      .intType()
+      .defaultValue(2)
+      .withDescription("Number of checkpoints to write the data set as, default 2");
+
+  @Override
+  public DynamicTableSource createDynamicTableSource(Context context) {
+    FactoryUtil.TableFactoryHelper helper = FactoryUtil.createTableFactoryHelper(this, context);
+    helper.validate();
+
+    Configuration conf = (Configuration) helper.getOptions();
+    Path path = new Path(conf.getOptional(FlinkOptions.PATH).orElseThrow(() ->
+        new ValidationException("Option [path] should be not empty.")));
+    return new ContinuousFileSource(context.getCatalogTable().getResolvedSchema(), path, conf);
+  }
+
+  @Override
+  public String factoryIdentifier() {
+    return FACTORY_ID;
+  }
+
+  @Override
+  public Set<ConfigOption<?>> requiredOptions() {
+    return Collections.singleton(FlinkOptions.PATH);
+  }
+
+  @Override
+  public Set<ConfigOption<?>> optionalOptions() {
+    return Collections.singleton(CHECKPOINTS);
+  }
+}
--- a/hudi-examples/hudi-examples-flink/src/main/java/org/apache/hudi/examples/quickstart/source/ContinuousFileSource.java
+++ b/hudi-examples/hudi-examples-flink/src/main/java/org/apache/hudi/examples/quickstart/source/ContinuousFileSource.java
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.examples.quickstart.source;
+
+import org.apache.flink.api.common.state.CheckpointListener;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.core.fs.Path;
+import org.apache.flink.formats.common.TimestampFormat;
+import org.apache.flink.formats.json.JsonRowDataDeserializationSchema;
+import org.apache.flink.streaming.api.datastream.DataStream;
+import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
+import org.apache.flink.streaming.api.functions.source.SourceFunction;
+import org.apache.flink.table.catalog.ResolvedSchema;
+import org.apache.flink.table.connector.ChangelogMode;
+import org.apache.flink.table.connector.source.DataStreamScanProvider;
+import org.apache.flink.table.connector.source.DynamicTableSource;
+import org.apache.flink.table.connector.source.ScanTableSource;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.runtime.typeutils.InternalTypeInfo;
+import org.apache.flink.table.types.logical.RowType;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import static org.apache.hudi.examples.quickstart.factory.ContinuousFileSourceFactory.CHECKPOINTS;
+
+/**
+ * A continuous file source that can trigger checkpoints continuously.
+ *
+ * <p>It loads the data in the specified file and split the data into number of checkpoints batches.
+ * Say, if you want 4 checkpoints and there are 8 records in the file, the emit strategy is:
+ *
+ * <pre>
+ *   | 2 records | 2 records | 2 records | 2 records |
+ *   | cp1       | cp2       |cp3        | cp4       |
+ * </pre>
+ *
+ * <p>If all the data are flushed out, it waits for the next checkpoint to finish and tear down the source.
+ */
+public class ContinuousFileSource implements ScanTableSource {
+
+  private final ResolvedSchema tableSchema;
+  private final Path path;
+  private final Configuration conf;
+
+  public ContinuousFileSource(
+      ResolvedSchema tableSchema,
+      Path path,
+      Configuration conf) {
+    this.tableSchema = tableSchema;
+    this.path = path;
+    this.conf = conf;
+  }
+
+  @Override
+  public ScanRuntimeProvider getScanRuntimeProvider(ScanContext scanContext) {
+    return new DataStreamScanProvider() {
+
+      @Override
+      public boolean isBounded() {
+        return false;
+      }
+
+      @Override
+      public DataStream<RowData> produceDataStream(StreamExecutionEnvironment execEnv) {
+        final RowType rowType = (RowType) tableSchema.toSourceRowDataType().getLogicalType();
+        JsonRowDataDeserializationSchema deserializationSchema = new JsonRowDataDeserializationSchema(
+            rowType,
+            InternalTypeInfo.of(rowType),
+            false,
+            true,
+            TimestampFormat.ISO_8601);
+
+        return execEnv.addSource(new BoundedSourceFunction(path, conf.getInteger(CHECKPOINTS)))
+            .name("continuous_file_source")
+            .setParallelism(1)
+            .map(record -> deserializationSchema.deserialize(record.getBytes(StandardCharsets.UTF_8)),
+                InternalTypeInfo.of(rowType));
+      }
+    };
+  }
+
+  @Override
+  public ChangelogMode getChangelogMode() {
+    return ChangelogMode.insertOnly();
+  }
+
+  @Override
+  public DynamicTableSource copy() {
+    return new ContinuousFileSource(this.tableSchema, this.path, this.conf);
+  }
+
+  @Override
+  public String asSummaryString() {
+    return "ContinuousFileSource";
+  }
+
+  /**
+   * Source function that partition the data into given number checkpoints batches.
+   */
+  public static class BoundedSourceFunction implements SourceFunction<String>, CheckpointListener {
+    private final Path path;
+    private List<String> dataBuffer;
+
+    private final int checkpoints;
+    private final AtomicInteger currentCP = new AtomicInteger(0);
+
+    private volatile boolean isRunning = true;
+
+    public BoundedSourceFunction(Path path, int checkpoints) {
+      this.path = path;
+      this.checkpoints = checkpoints;
+    }
+
+    @Override
+    public void run(SourceContext<String> context) throws Exception {
+      if (this.dataBuffer == null) {
+        loadDataBuffer();
+      }
+      int oldCP = this.currentCP.get();
+      boolean finish = false;
+      while (isRunning) {
+        int batchSize = this.dataBuffer.size() / this.checkpoints;
+        int start = batchSize * oldCP;
+        synchronized (context.getCheckpointLock()) {
+          for (int i = start; i < start + batchSize; i++) {
+            if (i >= this.dataBuffer.size()) {
+              finish = true;
+              break;
+              // wait for the next checkpoint and exit
+            }
+            context.collect(this.dataBuffer.get(i));
+          }
+        }
+        oldCP++;
+        while (this.currentCP.get() < oldCP) {
+          synchronized (context.getCheckpointLock()) {
+            context.getCheckpointLock().wait(10);
+          }
+        }
+        if (finish || !isRunning) {
+          return;
+        }
+      }
+    }
+
+    @Override
+    public void cancel() {
+      this.isRunning = false;
+    }
+
+    private void loadDataBuffer() {
+      try {
+        this.dataBuffer = Files.readAllLines(Paths.get(this.path.toUri()));
+      } catch (IOException e) {
+        throw new RuntimeException("Read file " + this.path + " error", e);
+      }
+    }
+
+    @Override
+    public void notifyCheckpointComplete(long l) {
+      this.currentCP.incrementAndGet();
+    }
+  }
+}
--- a/hudi-examples/hudi-examples-flink/src/main/java/org/apache/hudi/examples/quickstart/utils/QuickstartConfigurations.java
+++ b/hudi-examples/hudi-examples-flink/src/main/java/org/apache/hudi/examples/quickstart/utils/QuickstartConfigurations.java
@@ -0,0 +1,317 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.examples.quickstart.utils;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.stream.Collectors;
+import org.apache.flink.configuration.ConfigOption;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.table.api.DataTypes;
+import org.apache.flink.table.api.TableSchema;
+import org.apache.flink.table.catalog.ResolvedSchema;
+import org.apache.flink.table.runtime.typeutils.RowDataSerializer;
+import org.apache.flink.table.types.DataType;
+import org.apache.flink.table.types.logical.RowType;
+import org.apache.hudi.configuration.FlinkOptions;
+import org.apache.hudi.examples.quickstart.factory.CollectSinkTableFactory;
+import org.apache.hudi.examples.quickstart.factory.ContinuousFileSourceFactory;
+import org.apache.hudi.streamer.FlinkStreamerConfig;
+
+/**
+ * Configurations for the test.
+ */
+public class QuickstartConfigurations {
+  private QuickstartConfigurations() {
+  }
+
+  public static final DataType ROW_DATA_TYPE = DataTypes.ROW(
+          DataTypes.FIELD("uuid", DataTypes.VARCHAR(20)),// record key
+          DataTypes.FIELD("name", DataTypes.VARCHAR(10)),
+          DataTypes.FIELD("age", DataTypes.INT()),
+          DataTypes.FIELD("ts", DataTypes.TIMESTAMP(3)), // precombine field
+          DataTypes.FIELD("partition", DataTypes.VARCHAR(10)))
+      .notNull();
+
+  public static final RowType ROW_TYPE = (RowType) ROW_DATA_TYPE.getLogicalType();
+
+  public static final ResolvedSchema TABLE_SCHEMA = SchemaBuilder.instance()
+      .fields(ROW_TYPE.getFieldNames(), ROW_DATA_TYPE.getChildren())
+      .build();
+
+  private static final List<String> FIELDS = ROW_TYPE.getFields().stream()
+      .map(RowType.RowField::asSummaryString).collect(Collectors.toList());
+
+  public static final DataType ROW_DATA_TYPE_WIDER = DataTypes.ROW(
+          DataTypes.FIELD("uuid", DataTypes.VARCHAR(20)),// record key
+          DataTypes.FIELD("name", DataTypes.VARCHAR(10)),
+          DataTypes.FIELD("age", DataTypes.INT()),
+          DataTypes.FIELD("salary", DataTypes.DOUBLE()),
+          DataTypes.FIELD("ts", DataTypes.TIMESTAMP(3)), // precombine field
+          DataTypes.FIELD("partition", DataTypes.VARCHAR(10)))
+      .notNull();
+
+  public static final RowType ROW_TYPE_WIDER = (RowType) ROW_DATA_TYPE_WIDER.getLogicalType();
+
+  public static String getCreateHoodieTableDDL(String tableName, Map<String, String> options) {
+    return getCreateHoodieTableDDL(tableName, options, true, "partition");
+  }
+
+  public static String getCreateHoodieTableDDL(
+      String tableName,
+      Map<String, String> options,
+      boolean havePartition,
+      String partitionField) {
+    return getCreateHoodieTableDDL(tableName, FIELDS, options, havePartition, "uuid", partitionField);
+  }
+
+  public static String getCreateHoodieTableDDL(
+      String tableName,
+      List<String> fields,
+      Map<String, String> options,
+      boolean havePartition,
+      String pkField,
+      String partitionField) {
+    StringBuilder builder = new StringBuilder();
+    builder.append("create table ").append(tableName).append("(\n");
+    for (String field : fields) {
+      builder.append("  ").append(field).append(",\n");
+    }
+    builder.append("  PRIMARY KEY(").append(pkField).append(") NOT ENFORCED\n")
+        .append(")\n");
+    if (havePartition) {
+      builder.append("PARTITIONED BY (`").append(partitionField).append("`)\n");
+    }
+    final String connector = options.computeIfAbsent("connector", k -> "hudi");
+    builder.append("with (\n"
+        + "  'connector' = '").append(connector).append("'");
+    options.forEach((k, v) -> builder.append(",\n")
+        .append("  '").append(k).append("' = '").append(v).append("'"));
+    builder.append("\n)");
+    return builder.toString();
+  }
+
+  public static String getCreateHudiCatalogDDL(final String catalogName, final String catalogPath) {
+    StringBuilder builder = new StringBuilder();
+    builder.append("create catalog ").append(catalogName).append(" with (\n");
+    builder.append("  'type' = 'hudi',\n"
+        + "  'catalog.path' = '").append(catalogPath).append("'");
+    builder.append("\n)");
+    return builder.toString();
+  }
+
+  public static String getFileSourceDDL(String tableName) {
+    return getFileSourceDDL(tableName, "source-file.json");
+  }
+
+  public static String getFileSourceDDL(String tableName, int checkpoints) {
+    return getFileSourceDDL(tableName, "source-file.json", checkpoints);
+  }
+
+  public static String getFileSourceDDL(String tableName, String fileName) {
+    return getFileSourceDDL(tableName, fileName, 2);
+  }
+
+  public static String getFileSourceDDL(String tableName, String fileName, int checkpoints) {
+    String sourcePath = Objects.requireNonNull(Thread.currentThread()
+        .getContextClassLoader().getResource(fileName)).toString();
+    return "create table " + tableName + "(\n"
+        + "  uuid varchar(20),\n"
+        + "  name varchar(10),\n"
+        + "  age int,\n"
+        + "  ts timestamp(3),\n"
+        + "  `partition` varchar(20)\n"
+        + ") with (\n"
+        + "  'connector' = '" + ContinuousFileSourceFactory.FACTORY_ID + "',\n"
+        + "  'path' = '" + sourcePath + "',\n"
+        + "  'checkpoints' = '" + checkpoints + "'\n"
+        + ")";
+  }
+
+  public static String getCollectSinkDDL(String tableName) {
+    return "create table " + tableName + "(\n"
+        + "  uuid varchar(20),\n"
+        + "  name varchar(10),\n"
+        + "  age int,\n"
+        + "  ts timestamp(3),\n"
+        + "  `partition` varchar(20)\n"
+        + ") with (\n"
+        + "  'connector' = '" + CollectSinkTableFactory.FACTORY_ID + "'"
+        + ")";
+  }
+
+  public static String getCollectSinkDDL(String tableName, TableSchema tableSchema) {
+    final StringBuilder builder = new StringBuilder("create table " + tableName + "(\n");
+    String[] fieldNames = tableSchema.getFieldNames();
+    DataType[] fieldTypes = tableSchema.getFieldDataTypes();
+    for (int i = 0; i < fieldNames.length; i++) {
+      builder.append("  `")
+          .append(fieldNames[i])
+          .append("` ")
+          .append(fieldTypes[i].toString());
+      if (i != fieldNames.length - 1) {
+        builder.append(",");
+      }
+      builder.append("\n");
+    }
+    final String withProps = ""
+        + ") with (\n"
+        + "  'connector' = '" + CollectSinkTableFactory.FACTORY_ID + "'\n"
+        + ")";
+    builder.append(withProps);
+    return builder.toString();
+  }
+
+  public static String getCsvSourceDDL(String tableName, String fileName) {
+    String sourcePath = Objects.requireNonNull(Thread.currentThread()
+        .getContextClassLoader().getResource(fileName)).toString();
+    return "create table " + tableName + "(\n"
+        + "  uuid varchar(20),\n"
+        + "  name varchar(10),\n"
+        + "  age int,\n"
+        + "  ts timestamp(3),\n"
+        + "  `partition` varchar(20)\n"
+        + ") with (\n"
+        + "  'connector' = 'filesystem',\n"
+        + "  'path' = '" + sourcePath + "',\n"
+        + "  'format' = 'csv'\n"
+        + ")";
+  }
+
+  public static final RowDataSerializer SERIALIZER = new RowDataSerializer(ROW_TYPE);
+
+  public static Configuration getDefaultConf(String tablePath) {
+    Configuration conf = new Configuration();
+    conf.setString(FlinkOptions.PATH, tablePath);
+    conf.setString(FlinkOptions.SOURCE_AVRO_SCHEMA_PATH,
+        Objects.requireNonNull(Thread.currentThread()
+            .getContextClassLoader().getResource("test_read_schema.avsc")).toString());
+    conf.setString(FlinkOptions.TABLE_NAME, "TestHoodieTable");
+    conf.setString(FlinkOptions.PARTITION_PATH_FIELD, "partition");
+    return conf;
+  }
+
+  public static FlinkStreamerConfig getDefaultStreamerConf(String tablePath) {
+    FlinkStreamerConfig streamerConf = new FlinkStreamerConfig();
+    streamerConf.targetBasePath = tablePath;
+    streamerConf.sourceAvroSchemaPath = Objects.requireNonNull(Thread.currentThread()
+        .getContextClassLoader().getResource("test_read_schema.avsc")).toString();
+    streamerConf.targetTableName = "TestHoodieTable";
+    streamerConf.partitionPathField = "partition";
+    streamerConf.tableType = "COPY_ON_WRITE";
+    streamerConf.checkpointInterval = 4000L;
+    return streamerConf;
+  }
+
+  /**
+   * Creates the tool to build hoodie table DDL.
+   */
+  public static Sql sql(String tableName) {
+    return new Sql(tableName);
+  }
+
+  public static Catalog catalog(String catalogName) {
+    return new Catalog(catalogName);
+  }
+
+  // -------------------------------------------------------------------------
+  //  Utilities
+  // -------------------------------------------------------------------------
+
+  /**
+   * Tool to build hoodie table DDL with schema {@link #TABLE_SCHEMA}.
+   */
+  public static class Sql {
+    private final Map<String, String> options;
+    private final String tableName;
+    private List<String> fields = new ArrayList<>();
+    private boolean withPartition = true;
+    private String pkField = "uuid";
+    private String partitionField = "partition";
+
+    public Sql(String tableName) {
+      options = new HashMap<>();
+      this.tableName = tableName;
+    }
+
+    public Sql option(ConfigOption<?> option, Object val) {
+      this.options.put(option.key(), val.toString());
+      return this;
+    }
+
+    public Sql option(String key, Object val) {
+      this.options.put(key, val.toString());
+      return this;
+    }
+
+    public Sql options(Map<String, String> options) {
+      this.options.putAll(options);
+      return this;
+    }
+
+    public Sql noPartition() {
+      this.withPartition = false;
+      return this;
+    }
+
+    public Sql pkField(String pkField) {
+      this.pkField = pkField;
+      return this;
+    }
+
+    public Sql partitionField(String partitionField) {
+      this.partitionField = partitionField;
+      return this;
+    }
+
+    public Sql field(String fieldSchema) {
+      fields.add(fieldSchema);
+      return this;
+    }
+
+    public String end() {
+      if (this.fields.size() == 0) {
+        this.fields = FIELDS;
+      }
+      return QuickstartConfigurations.getCreateHoodieTableDDL(this.tableName, this.fields, options,
+          this.withPartition, this.pkField, this.partitionField);
+    }
+  }
+
+  public static class Catalog {
+    private final String catalogName;
+    private String catalogPath = ".";
+
+    public Catalog(String catalogName) {
+      this.catalogName = catalogName;
+    }
+
+    public Catalog catalogPath(String catalogPath) {
+      this.catalogPath = catalogPath;
+      return this;
+    }
+
+    public String end() {
+      return QuickstartConfigurations.getCreateHudiCatalogDDL(catalogName, catalogPath);
+    }
+  }
+}
--- a/hudi-examples/hudi-examples-flink/src/main/java/org/apache/hudi/examples/quickstart/utils/SchemaBuilder.java
+++ b/hudi-examples/hudi-examples-flink/src/main/java/org/apache/hudi/examples/quickstart/utils/SchemaBuilder.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.examples.quickstart.utils;
+
+import org.apache.flink.table.catalog.Column;
+import org.apache.flink.table.catalog.ResolvedSchema;
+import org.apache.flink.table.catalog.UniqueConstraint;
+import org.apache.flink.table.catalog.WatermarkSpec;
+import org.apache.flink.table.types.DataType;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+/**
+ * Builder for {@link ResolvedSchema}.
+ */
+public class SchemaBuilder {
+  private List<Column> columns;
+  private List<WatermarkSpec> watermarkSpecs;
+  private UniqueConstraint constraint;
+
+  public static SchemaBuilder instance() {
+    return new SchemaBuilder();
+  }
+
+  private SchemaBuilder() {
+    this.columns = new ArrayList<>();
+    this.watermarkSpecs = new ArrayList<>();
+  }
+
+  public SchemaBuilder field(String name, DataType type) {
+    this.columns.add(Column.physical(name, type));
+    return this;
+  }
+
+  public SchemaBuilder fields(List<String> names, List<DataType> types) {
+    List<Column> columns = IntStream.range(0, names.size())
+        .mapToObj(idx -> Column.physical(names.get(idx), types.get(idx)))
+        .collect(Collectors.toList());
+    this.columns.addAll(columns);
+    return this;
+  }
+
+  public SchemaBuilder primaryKey(String... columns) {
+    this.constraint = UniqueConstraint.primaryKey("pk", Arrays.asList(columns));
+    return this;
+  }
+
+  public ResolvedSchema build() {
+    return new ResolvedSchema(columns, watermarkSpecs, constraint);
+  }
+}
--- a/hudi-examples/hudi-examples-flink/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory
+++ b/hudi-examples/hudi-examples-flink/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+org.apache.hudi.examples.quickstart.factory.ContinuousFileSourceFactory
+org.apache.hudi.examples.quickstart.factory.CollectSinkTableFactory
--- a/hudi-examples/hudi-examples-flink/src/main/resources/source-file.json
+++ b/hudi-examples/hudi-examples-flink/src/main/resources/source-file.json
@@ -0,0 +1,8 @@
+{"uuid": "id1", "name": "Danny", "age": 23, "ts": "1970-01-01T00:00:01", "partition": "par1"}
+{"uuid": "id2", "name": "Stephen", "age": 33, "ts": "1970-01-01T00:00:02", "partition": "par1"}
+{"uuid": "id3", "name": "Julian", "age": 53, "ts": "1970-01-01T00:00:03", "partition": "par2"}
+{"uuid": "id4", "name": "Fabian", "age": 31, "ts": "1970-01-01T00:00:04", "partition": "par2"}
+{"uuid": "id5", "name": "Sophia", "age": 18, "ts": "1970-01-01T00:00:05", "partition": "par3"}
+{"uuid": "id6", "name": "Emma", "age": 20, "ts": "1970-01-01T00:00:06", "partition": "par3"}
+{"uuid": "id7", "name": "Bob", "age": 44, "ts": "1970-01-01T00:00:07", "partition": "par4"}
+{"uuid": "id8", "name": "Han", "age": 56, "ts": "1970-01-01T00:00:08", "partition": "par4"}
--- a/hudi-examples/hudi-examples-flink/src/test/java/org/apache/hudi/examples/quickstart/TestHoodieFlinkQuickstart.java
+++ b/hudi-examples/hudi-examples-flink/src/test/java/org/apache/hudi/examples/quickstart/TestHoodieFlinkQuickstart.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.examples.quickstart;
+
+import static org.apache.hudi.examples.quickstart.TestQuickstartData.assertRowsEquals;
+import java.io.File;
+import java.util.List;
+import org.apache.flink.table.api.TableEnvironment;
+import org.apache.flink.test.util.AbstractTestBase;
+import org.apache.flink.types.Row;
+import org.apache.hudi.common.model.HoodieTableType;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.io.TempDir;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.EnumSource;
+
+/**
+ * IT cases for Hoodie table source and sink.
+ */
+public class TestHoodieFlinkQuickstart extends AbstractTestBase {
+  private final HoodieFlinkQuickstart flinkQuickstart = HoodieFlinkQuickstart.instance();
+
+  @BeforeEach
+  void beforeEach() {
+    flinkQuickstart.initEnv();
+  }
+
+  @TempDir
+  File tempFile;
+
+  @ParameterizedTest
+  @EnumSource(value = HoodieTableType.class)
+  void testHoodieFlinkQuickstart(HoodieTableType tableType) throws Exception {
+    // create filesystem table named source
+    flinkQuickstart.createFileSource();
+
+    // create hudi table
+    flinkQuickstart.createHudiTable(tempFile.getAbsolutePath(), "t1", tableType);
+
+    // insert data
+    List<Row> rows = flinkQuickstart.insertData();
+    assertRowsEquals(rows, TestQuickstartData.DATA_SET_SOURCE_INSERT_LATEST_COMMIT);
+
+    // query data
+    List<Row> rows1 = flinkQuickstart.queryData();
+    assertRowsEquals(rows1, TestQuickstartData.DATA_SET_SOURCE_INSERT_LATEST_COMMIT);
+
+    // update data
+    List<Row> rows2 = flinkQuickstart.updateData();
+    assertRowsEquals(rows2, TestQuickstartData.DATA_SET_SOURCE_INSERT_LATEST_COMMIT);
+  }
+}
--- a/hudi-examples/hudi-examples-flink/src/test/java/org/apache/hudi/examples/quickstart/TestQuickstartData.java
+++ b/hudi-examples/hudi-examples-flink/src/test/java/org/apache/hudi/examples/quickstart/TestQuickstartData.java
@@ -0,0 +1,422 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.examples.quickstart;
+
+import static junit.framework.TestCase.assertEquals;
+import static org.hamcrest.CoreMatchers.is;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import java.io.File;
+import java.io.FileFilter;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Properties;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.data.StringData;
+import org.apache.flink.table.data.TimestampData;
+import org.apache.flink.table.data.binary.BinaryRowData;
+import org.apache.flink.table.data.conversion.DataStructureConverter;
+import org.apache.flink.table.data.conversion.DataStructureConverters;
+import org.apache.flink.table.data.writer.BinaryRowWriter;
+import org.apache.flink.table.data.writer.BinaryWriter;
+import org.apache.flink.table.runtime.typeutils.InternalSerializers;
+import org.apache.flink.table.types.logical.LogicalType;
+import org.apache.flink.table.types.logical.RowType;
+import org.apache.flink.types.Row;
+import org.apache.flink.types.RowKind;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hudi.common.config.HoodieCommonConfig;
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner;
+import org.apache.hudi.examples.quickstart.utils.QuickstartConfigurations;
+import org.apache.parquet.Strings;
+import org.apache.parquet.avro.AvroParquetReader;
+import org.apache.parquet.hadoop.ParquetReader;
+
+/**
+ * Data set for testing, also some utilities to check the results.
+ */
+public class TestQuickstartData {
+
+  public static List<RowData> DATA_SET_INSERT_DUPLICATES = new ArrayList<>();
+
+  static {
+    IntStream.range(0, 5).forEach(i -> DATA_SET_INSERT_DUPLICATES.add(
+        insertRow(StringData.fromString("id1"), StringData.fromString("Danny"), 23,
+            TimestampData.fromEpochMillis(1), StringData.fromString("par1"))));
+  }
+
+  public static List<RowData> DATA_SET_INSERT_SAME_KEY = new ArrayList<>();
+
+  static {
+    IntStream.range(0, 5).forEach(i -> DATA_SET_INSERT_SAME_KEY.add(
+        insertRow(StringData.fromString("id1"), StringData.fromString("Danny"), 23,
+            TimestampData.fromEpochMillis(i), StringData.fromString("par1"))));
+  }
+
+  // data set of source-file.json latest commit.
+  public static List<RowData> DATA_SET_SOURCE_INSERT_LATEST_COMMIT = Arrays.asList(
+      insertRow(StringData.fromString("id5"), StringData.fromString("Sophia"), 18,
+          TimestampData.fromEpochMillis(5000), StringData.fromString("par3")),
+      insertRow(StringData.fromString("id6"), StringData.fromString("Emma"), 20,
+          TimestampData.fromEpochMillis(6000), StringData.fromString("par3")),
+      insertRow(StringData.fromString("id7"), StringData.fromString("Bob"), 44,
+          TimestampData.fromEpochMillis(7000), StringData.fromString("par4")),
+      insertRow(StringData.fromString("id8"), StringData.fromString("Han"), 56,
+          TimestampData.fromEpochMillis(8000), StringData.fromString("par4"))
+  );
+
+  public static List<RowData> DATA_SET_DISORDER_UPDATE_DELETE = Arrays.asList(
+      // DISORDER UPDATE
+      updateAfterRow(StringData.fromString("id1"), StringData.fromString("Danny"), 21,
+          TimestampData.fromEpochMillis(3), StringData.fromString("par1")),
+      updateAfterRow(StringData.fromString("id1"), StringData.fromString("Danny"), 20,
+          TimestampData.fromEpochMillis(2), StringData.fromString("par1")),
+      updateBeforeRow(StringData.fromString("id1"), StringData.fromString("Danny"), 23,
+          TimestampData.fromEpochMillis(1), StringData.fromString("par1")),
+      updateBeforeRow(StringData.fromString("id1"), StringData.fromString("Danny"), 20,
+          TimestampData.fromEpochMillis(2), StringData.fromString("par1")),
+      updateAfterRow(StringData.fromString("id1"), StringData.fromString("Danny"), 22,
+          TimestampData.fromEpochMillis(4), StringData.fromString("par1")),
+      updateBeforeRow(StringData.fromString("id1"), StringData.fromString("Danny"), 21,
+          TimestampData.fromEpochMillis(3), StringData.fromString("par1")),
+      // DISORDER DELETE
+      deleteRow(StringData.fromString("id1"), StringData.fromString("Danny"), 22,
+          TimestampData.fromEpochMillis(2), StringData.fromString("par1"))
+  );
+
+  public static List<RowData> dataSetInsert(int... ids) {
+    List<RowData> inserts = new ArrayList<>();
+    Arrays.stream(ids).forEach(i -> inserts.add(
+        insertRow(StringData.fromString("id" + i), StringData.fromString("Danny"), 23,
+            TimestampData.fromEpochMillis(i), StringData.fromString("par1"))));
+    return inserts;
+  }
+
+  private static Integer toIdSafely(Object id) {
+    if (id == null) {
+      return -1;
+    }
+    final String idStr = id.toString();
+    if (idStr.startsWith("id")) {
+      return Integer.parseInt(idStr.substring(2));
+    }
+    return -1;
+  }
+
+  /**
+   * Returns string format of a list of RowData.
+   */
+  public static String rowDataToString(List<RowData> rows) {
+    DataStructureConverter<Object, Object> converter =
+        DataStructureConverters.getConverter(QuickstartConfigurations.ROW_DATA_TYPE);
+    return rows.stream()
+        .sorted(Comparator.comparing(o -> toIdSafely(o.getString(0))))
+        .map(row -> converter.toExternal(row).toString())
+        .collect(Collectors.toList()).toString();
+  }
+
+  private static String toStringSafely(Object obj) {
+    return obj == null ? "null" : obj.toString();
+  }
+
+  /**
+   * Sort the {@code rows} using field at index 0 and asserts
+   * it equals with the expected string {@code expected}.
+   *
+   * @param rows     Actual result rows
+   * @param expected Expected string of the sorted rows
+   */
+  public static void assertRowsEquals(List<Row> rows, String expected) {
+    assertRowsEquals(rows, expected, false);
+  }
+
+  /**
+   * Sort the {@code rows} using field at index 0 and asserts
+   * it equals with the expected string {@code expected}.
+   *
+   * @param rows           Actual result rows
+   * @param expected       Expected string of the sorted rows
+   * @param withChangeFlag Whether compares with change flags
+   */
+  public static void assertRowsEquals(List<Row> rows, String expected, boolean withChangeFlag) {
+    String rowsString = rows.stream()
+        .sorted(Comparator.comparing(o -> toStringSafely(o.getField(0))))
+        .map(row -> {
+          final String rowStr = row.toString();
+          if (withChangeFlag) {
+            return row.getKind().shortString() + "(" + rowStr + ")";
+          } else {
+            return rowStr;
+          }
+        })
+        .collect(Collectors.toList()).toString();
+    assertThat(rowsString, is(expected));
+  }
+
+  /**
+   * Sort the {@code rows} using field at index {@code orderingPos} and asserts
+   * it equals with the expected string {@code expected}.
+   *
+   * @param rows        Actual result rows
+   * @param expected    Expected string of the sorted rows
+   * @param orderingPos Field position for ordering
+   */
+  public static void assertRowsEquals(List<Row> rows, String expected, int orderingPos) {
+    String rowsString = rows.stream()
+        .sorted(Comparator.comparing(o -> toStringSafely(o.getField(orderingPos))))
+        .collect(Collectors.toList()).toString();
+    assertThat(rowsString, is(expected));
+  }
+
+  /**
+   * Sort the {@code rows} using field at index 0 and asserts
+   * it equals with the expected row data list {@code expected}.
+   *
+   * @param rows     Actual result rows
+   * @param expected Expected row data list
+   */
+  public static void assertRowsEquals(List<Row> rows, List<RowData> expected) {
+    String rowsString = rows.stream()
+        .sorted(Comparator.comparing(o -> toIdSafely(o.getField(0))))
+        .collect(Collectors.toList()).toString();
+    assertThat(rowsString, is(rowDataToString(expected)));
+  }
+
+  /**
+   * Sort the {@code rows} using field at index 0 and asserts
+   * it equals with the expected string {@code expected}.
+   *
+   * @param rows     Actual result rows
+   * @param expected Expected string of the sorted rows
+   */
+  public static void assertRowDataEquals(List<RowData> rows, String expected) {
+    String rowsString = rowDataToString(rows);
+    assertThat(rowsString, is(expected));
+  }
+
+  /**
+   * Sort the {@code rows} using field at index 0 and asserts
+   * it equals with the expected row data list {@code expected}.
+   *
+   * @param rows     Actual result rows
+   * @param expected Expected row data list
+   */
+  public static void assertRowDataEquals(List<RowData> rows, List<RowData> expected) {
+    String rowsString = rowDataToString(rows);
+    assertThat(rowsString, is(rowDataToString(expected)));
+  }
+
+  /**
+   * Checks the source data set are written as expected.
+   *
+   * <p>Note: Replace it with the Flink reader when it is supported.
+   *
+   * @param baseFile The file base to check, should be a directory
+   * @param expected The expected results mapping, the key should be the partition path
+   *                 and value should be values list with the key partition
+   */
+  public static void checkWrittenData(File baseFile, Map<String, String> expected) throws IOException {
+    checkWrittenData(baseFile, expected, 4);
+  }
+
+  /**
+   * Checks the source data set are written as expected.
+   *
+   * <p>Note: Replace it with the Flink reader when it is supported.
+   *
+   * @param baseFile   The file base to check, should be a directory
+   * @param expected   The expected results mapping, the key should be the partition path
+   *                   and value should be values list with the key partition
+   * @param partitions The expected partition number
+   */
+  public static void checkWrittenData(
+      File baseFile,
+      Map<String, String> expected,
+      int partitions) throws IOException {
+    assert baseFile.isDirectory();
+    FileFilter filter = file -> !file.getName().startsWith(".");
+    File[] partitionDirs = baseFile.listFiles(filter);
+    assertNotNull(partitionDirs);
+    assertThat(partitionDirs.length, is(partitions));
+    for (File partitionDir : partitionDirs) {
+      File[] dataFiles = partitionDir.listFiles(filter);
+      assertNotNull(dataFiles);
+      File latestDataFile = Arrays.stream(dataFiles)
+          .max(Comparator.comparing(f -> FSUtils.getCommitTime(f.getName())))
+          .orElse(dataFiles[0]);
+      ParquetReader<GenericRecord> reader = AvroParquetReader
+          .<GenericRecord>builder(new Path(latestDataFile.getAbsolutePath())).build();
+      List<String> readBuffer = new ArrayList<>();
+      GenericRecord nextRecord = reader.read();
+      while (nextRecord != null) {
+        readBuffer.add(filterOutVariables(nextRecord));
+        nextRecord = reader.read();
+      }
+      readBuffer.sort(Comparator.naturalOrder());
+      assertThat(readBuffer.toString(), is(expected.get(partitionDir.getName())));
+    }
+  }
+
+  /**
+   * Checks the MERGE_ON_READ source data are written as expected.
+   *
+   * <p>Note: Replace it with the Flink reader when it is supported.
+   *
+   * @param fs            The file system
+   * @param latestInstant The latest committed instant of current table
+   * @param baseFile      The file base to check, should be a directory
+   * @param expected      The expected results mapping, the key should be the partition path
+   * @param partitions    The expected partition number
+   * @param schema        The read schema
+   */
+  public static void checkWrittenDataMOR(
+      FileSystem fs,
+      String latestInstant,
+      File baseFile,
+      Map<String, String> expected,
+      int partitions,
+      Schema schema) {
+    assert baseFile.isDirectory() : "Base path should be a directory";
+    FileFilter partitionFilter = file -> !file.getName().startsWith(".");
+    File[] partitionDirs = baseFile.listFiles(partitionFilter);
+    assertNotNull(partitionDirs);
+    assertThat(partitionDirs.length, is(partitions));
+    for (File partitionDir : partitionDirs) {
+      File[] dataFiles = partitionDir.listFiles(file ->
+          file.getName().contains(".log.") && !file.getName().startsWith(".."));
+      assertNotNull(dataFiles);
+      HoodieMergedLogRecordScanner scanner = getScanner(
+          fs, baseFile.getPath(), Arrays.stream(dataFiles).map(File::getAbsolutePath)
+              .sorted(Comparator.naturalOrder()).collect(Collectors.toList()),
+          schema, latestInstant);
+      List<String> readBuffer = scanner.getRecords().values().stream()
+          .map(hoodieRecord -> {
+            try {
+              // in case it is a delete
+              GenericRecord record = (GenericRecord) hoodieRecord.getData()
+                  .getInsertValue(schema, new Properties())
+                  .orElse(null);
+              return record == null ? (String) null : filterOutVariables(record);
+            } catch (IOException e) {
+              throw new RuntimeException(e);
+            }
+          })
+          .filter(Objects::nonNull)
+          .sorted(Comparator.naturalOrder())
+          .collect(Collectors.toList());
+      assertThat(readBuffer.toString(), is(expected.get(partitionDir.getName())));
+    }
+  }
+
+  /**
+   * Returns the scanner to read avro log files.
+   */
+  private static HoodieMergedLogRecordScanner getScanner(
+      FileSystem fs,
+      String basePath,
+      List<String> logPaths,
+      Schema readSchema,
+      String instant) {
+    return HoodieMergedLogRecordScanner.newBuilder()
+        .withFileSystem(fs)
+        .withBasePath(basePath)
+        .withLogFilePaths(logPaths)
+        .withReaderSchema(readSchema)
+        .withLatestInstantTime(instant)
+        .withReadBlocksLazily(false)
+        .withReverseReader(false)
+        .withBufferSize(16 * 1024 * 1024)
+        .withMaxMemorySizeInBytes(1024 * 1024L)
+        .withSpillableMapBasePath("/tmp/")
+        .withDiskMapType(HoodieCommonConfig.SPILLABLE_DISK_MAP_TYPE.defaultValue())
+        .withBitCaskDiskMapCompressionEnabled(HoodieCommonConfig.DISK_MAP_BITCASK_COMPRESSION_ENABLED.defaultValue())
+        .build();
+  }
+
+  /**
+   * Filter out the variables like file name.
+   */
+  private static String filterOutVariables(GenericRecord genericRecord) {
+    List<String> fields = new ArrayList<>();
+    fields.add(genericRecord.get("_hoodie_record_key").toString());
+    fields.add(genericRecord.get("_hoodie_partition_path").toString());
+    fields.add(genericRecord.get("uuid").toString());
+    fields.add(genericRecord.get("name").toString());
+    fields.add(genericRecord.get("age").toString());
+    fields.add(genericRecord.get("ts").toString());
+    fields.add(genericRecord.get("partition").toString());
+    return Strings.join(fields, ",");
+  }
+
+  public static BinaryRowData insertRow(Object... fields) {
+    return insertRow(QuickstartConfigurations.ROW_TYPE, fields);
+  }
+
+  public static BinaryRowData insertRow(RowType rowType, Object... fields) {
+    LogicalType[] types = rowType.getFields().stream().map(RowType.RowField::getType)
+        .toArray(LogicalType[]::new);
+    assertEquals(
+        "Filed count inconsistent with type information",
+        fields.length,
+        types.length);
+    BinaryRowData row = new BinaryRowData(fields.length);
+    BinaryRowWriter writer = new BinaryRowWriter(row);
+    writer.reset();
+    for (int i = 0; i < fields.length; i++) {
+      Object field = fields[i];
+      if (field == null) {
+        writer.setNullAt(i);
+      } else {
+        BinaryWriter.write(writer, i, field, types[i], InternalSerializers.create(types[i]));
+      }
+    }
+    writer.complete();
+    return row;
+  }
+
+  private static BinaryRowData deleteRow(Object... fields) {
+    BinaryRowData rowData = insertRow(fields);
+    rowData.setRowKind(RowKind.DELETE);
+    return rowData;
+  }
+
+  private static BinaryRowData updateBeforeRow(Object... fields) {
+    BinaryRowData rowData = insertRow(fields);
+    rowData.setRowKind(RowKind.UPDATE_BEFORE);
+    return rowData;
+  }
+
+  private static BinaryRowData updateAfterRow(Object... fields) {
+    BinaryRowData rowData = insertRow(fields);
+    rowData.setRowKind(RowKind.UPDATE_AFTER);
+    return rowData;
+  }
+}
--- a/hudi-examples/hudi-examples-flink/src/test/resources/META-INF/services/org.apache.flink.table.factories.Factory
+++ b/hudi-examples/hudi-examples-flink/src/test/resources/META-INF/services/org.apache.flink.table.factories.Factory
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+org.apache.hudi.examples.quickstart.factory.ContinuousFileSourceFactory
+org.apache.hudi.examples.quickstart.factory.CollectSinkTableFactory
--- a/hudi-examples/hudi-examples-flink/src/test/resources/log4j-surefire-quiet.properties
+++ b/hudi-examples/hudi-examples-flink/src/test/resources/log4j-surefire-quiet.properties
@@ -0,0 +1,30 @@
+###
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+###
+log4j.rootLogger=WARN, CONSOLE
+log4j.logger.org.apache.hudi=DEBUG
+log4j.logger.org.apache.hadoop.hbase=ERROR
+
+# CONSOLE is set to be a ConsoleAppender.
+log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
+# CONSOLE uses PatternLayout.
+log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
+log4j.appender.CONSOLE.layout.ConversionPattern=[%-5p] %d %c %x - %m%n
+log4j.appender.CONSOLE.filter.a=org.apache.log4j.varia.LevelRangeFilter
+log4j.appender.CONSOLE.filter.a.AcceptOnMatch=true
+log4j.appender.CONSOLE.filter.a.LevelMin=WARN
+log4j.appender.CONSOLE.filter.a.LevelMax=FATAL
--- a/hudi-examples/hudi-examples-flink/src/test/resources/log4j-surefire.properties
+++ b/hudi-examples/hudi-examples-flink/src/test/resources/log4j-surefire.properties
@@ -0,0 +1,31 @@
+###
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+###
+log4j.rootLogger=INFO, CONSOLE
+log4j.logger.org.apache=INFO
+log4j.logger.org.apache.hudi=DEBUG
+log4j.logger.org.apache.hadoop.hbase=ERROR
+
+# A1 is set to be a ConsoleAppender.
+log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
+# A1 uses PatternLayout.
+log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
+log4j.appender.CONSOLE.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
+log4j.appender.CONSOLE.filter.a=org.apache.log4j.varia.LevelRangeFilter
+log4j.appender.CONSOLE.filter.a.AcceptOnMatch=true
+log4j.appender.CONSOLE.filter.a.LevelMin=INFO
+log4j.appender.CONSOLE.filter.a.LevelMax=FATAL
--- a/hudi-examples/hudi-examples-java/pom.xml
+++ b/hudi-examples/hudi-examples-java/pom.xml
@@ -0,0 +1,129 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <parent>
+        <artifactId>hudi-examples</artifactId>
+        <groupId>org.apache.hudi</groupId>
+        <version>0.11.0-SNAPSHOT</version>
+    </parent>
+    <modelVersion>4.0.0</modelVersion>
+
+    <artifactId>hudi-examples-java</artifactId>
+
+    <properties>
+        <main.basedir>${project.parent.basedir}</main.basedir>
+        <checkstyle.skip>true</checkstyle.skip>
+    </properties>
+
+    <build>
+        <resources>
+            <resource>
+                <directory>src/main/resources</directory>
+            </resource>
+        </resources>
+
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-dependency-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>copy-dependencies</id>
+                        <phase>prepare-package</phase>
+                        <goals>
+                            <goal>copy-dependencies</goal>
+                        </goals>
+                        <configuration>
+                            <outputDirectory>${project.build.directory}/lib</outputDirectory>
+                            <overWriteReleases>true</overWriteReleases>
+                            <overWriteSnapshots>true</overWriteSnapshots>
+                            <overWriteIfNewer>true</overWriteIfNewer>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>net.alchim31.maven</groupId>
+                <artifactId>scala-maven-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>scala-compile-first</id>
+                        <phase>process-resources</phase>
+                        <goals>
+                            <goal>add-source</goal>
+                            <goal>compile</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <phase>compile</phase>
+                        <goals>
+                            <goal>compile</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-jar-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <goals>
+                            <goal>test-jar</goal>
+                        </goals>
+                        <phase>test-compile</phase>
+                    </execution>
+                </executions>
+                <configuration>
+                    <skip>false</skip>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.rat</groupId>
+                <artifactId>apache-rat-plugin</artifactId>
+            </plugin>
+        </plugins>
+    </build>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-examples-common</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-client-common</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-java-client</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+    </dependencies>
+</project>
--- a/hudi-examples/hudi-examples-java/src/main/java/org/apache/hudi/examples/java/HoodieJavaWriteClientExample.java
+++ b/hudi-examples/hudi-examples-java/src/main/java/org/apache/hudi/examples/java/HoodieJavaWriteClientExample.java
--- a/hudi-examples/hudi-examples-spark/pom.xml
+++ b/hudi-examples/hudi-examples-spark/pom.xml
@@ -0,0 +1,283 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <parent>
+        <artifactId>hudi-examples</artifactId>
+        <groupId>org.apache.hudi</groupId>
+        <version>0.11.0-SNAPSHOT</version>
+    </parent>
+    <modelVersion>4.0.0</modelVersion>
+
+    <artifactId>hudi-examples-spark</artifactId>
+
+    <properties>
+        <main.basedir>${project.parent.basedir}</main.basedir>
+        <checkstyle.skip>true</checkstyle.skip>
+    </properties>
+
+    <build>
+        <resources>
+            <resource>
+                <directory>src/main/resources</directory>
+            </resource>
+        </resources>
+
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-dependency-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>copy-dependencies</id>
+                        <phase>prepare-package</phase>
+                        <goals>
+                            <goal>copy-dependencies</goal>
+                        </goals>
+                        <configuration>
+                            <outputDirectory>${project.build.directory}/lib</outputDirectory>
+                            <overWriteReleases>true</overWriteReleases>
+                            <overWriteSnapshots>true</overWriteSnapshots>
+                            <overWriteIfNewer>true</overWriteIfNewer>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>net.alchim31.maven</groupId>
+                <artifactId>scala-maven-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>scala-compile-first</id>
+                        <phase>process-resources</phase>
+                        <goals>
+                            <goal>add-source</goal>
+                            <goal>compile</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <phase>compile</phase>
+                        <goals>
+                            <goal>compile</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-jar-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <goals>
+                            <goal>test-jar</goal>
+                        </goals>
+                        <phase>test-compile</phase>
+                    </execution>
+                </executions>
+                <configuration>
+                    <skip>false</skip>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.rat</groupId>
+                <artifactId>apache-rat-plugin</artifactId>
+            </plugin>
+        </plugins>
+    </build>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.scala-lang</groupId>
+            <artifactId>scala-library</artifactId>
+            <version>${scala.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-examples-common</artifactId>
+            <version>${project.version}</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>*</groupId>
+                    <artifactId>*</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-cli</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-client-common</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-java-client</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-spark-client</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-utilities_${scala.binary.version}</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-spark_${scala.binary.version}</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-spark-common_${scala.binary.version}</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-hadoop-mr</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-timeline-service</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
+        <!-- Spark -->
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-core_${scala.binary.version}</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-sql_${scala.binary.version}</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-avro_${scala.binary.version}</artifactId>
+        </dependency>
+
+        <!-- Parquet -->
+        <dependency>
+            <groupId>org.apache.parquet</groupId>
+            <artifactId>parquet-hadoop</artifactId>
+            <version>${parquet.version}</version>
+        </dependency>
+
+        <!-- Avro -->
+        <dependency>
+            <groupId>org.apache.avro</groupId>
+            <artifactId>avro</artifactId>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.parquet</groupId>
+            <artifactId>parquet-avro</artifactId>
+        </dependency>
+
+        <!--  Hive  -->
+        <dependency>
+            <groupId>${hive.groupid}</groupId>
+            <artifactId>hive-common</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>${hive.groupid}</groupId>
+            <artifactId>hive-exec</artifactId>
+            <version>${hive.version}</version>
+            <scope>provided</scope>
+            <classifier>${hive.exec.classifier}</classifier>
+            <exclusions>
+                <exclusion>
+                    <groupId>javax.mail</groupId>
+                    <artifactId>mail</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.eclipse.jetty.aggregate</groupId>
+                    <artifactId>*</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <!-- Junit dependencies -->
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-api</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.junit.vintage</groupId>
+            <artifactId>junit-vintage-engine</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-params</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.mockito</groupId>
+            <artifactId>mockito-junit-jupiter</artifactId>
+            <scope>test</scope>
+        </dependency>
+
+        <!-- Hudi Tests -->
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-client-common</artifactId>
+            <version>${project.version}</version>
+            <classifier>tests</classifier>
+            <type>test-jar</type>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-spark-client</artifactId>
+            <version>${project.version}</version>
+            <classifier>tests</classifier>
+            <type>test-jar</type>
+            <scope>test</scope>
+        </dependency>
+    </dependencies>
+</project>
--- a/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/common/ExampleDataSchemaProvider.java
+++ b/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/common/ExampleDataSchemaProvider.java
@@ -23,7 +23,6 @@ import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.utilities.schema.SchemaProvider;
 import org.apache.spark.api.java.JavaSparkContext;

-
 /**
 * the example SchemaProvider of example json data from uber.
 */
--- a/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/common/HoodieExampleSparkUtils.java
+++ b/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/common/HoodieExampleSparkUtils.java
--- a/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/common/IdentityTransformer.java
+++ b/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/common/IdentityTransformer.java
--- a/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/common/RandomJsonSource.java
+++ b/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/common/RandomJsonSource.java
--- a/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/quickstart/HoodieSparkQuickstart.java
+++ b/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/quickstart/HoodieSparkQuickstart.java
@@ -0,0 +1,227 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.examples.quickstart;
+
+import static org.apache.hudi.config.HoodieWriteConfig.TBL_NAME;
+import static org.apache.spark.sql.SaveMode.Append;
+import static org.apache.spark.sql.SaveMode.Overwrite;
+import java.util.List;
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.hudi.common.model.HoodieAvroPayload;
+import org.apache.hudi.common.model.WriteOperationType;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.examples.common.HoodieExampleDataGenerator;
+import org.apache.hudi.examples.common.HoodieExampleSparkUtils;
+import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+
+public final class HoodieSparkQuickstart {
+
+  private HoodieSparkQuickstart() {
+  }
+
+  public static void main(String[] args) {
+    if (args.length < 2) {
+      System.err.println("Usage: HoodieWriteClientExample <tablePath> <tableName>");
+      System.exit(1);
+    }
+    String tablePath = args[0];
+    String tableName = args[1];
+
+    SparkSession spark = HoodieExampleSparkUtils.defaultSparkSession("Hudi Spark basic example");
+    SparkConf sparkConf = HoodieExampleSparkUtils.defaultSparkConf("hoodie-client-example");
+
+    try (JavaSparkContext jsc = new JavaSparkContext(sparkConf)) {
+      final HoodieExampleDataGenerator<HoodieAvroPayload> dataGen = new HoodieExampleDataGenerator<>();
+
+      insertData(spark, jsc, tablePath, tableName, dataGen);
+      updateData(spark, jsc, tablePath, tableName, dataGen);
+      queryData(spark, jsc, tablePath, tableName, dataGen);
+
+      incrementalQuery(spark, tablePath, tableName);
+      pointInTimeQuery(spark, tablePath, tableName);
+
+      delete(spark, tablePath, tableName);
+      deleteByPartition(spark, tablePath, tableName);
+    }
+  }
+
+  /**
+   * Generate some new trips, load them into a DataFrame and write the DataFrame into the Hudi dataset as below.
+   */
+  public static void insertData(SparkSession spark, JavaSparkContext jsc, String tablePath, String tableName,
+                                HoodieExampleDataGenerator<HoodieAvroPayload> dataGen) {
+    String commitTime = Long.toString(System.currentTimeMillis());
+    List<String> inserts = dataGen.convertToStringList(dataGen.generateInserts(commitTime, 20));
+    Dataset<Row> df = spark.read().json(jsc.parallelize(inserts, 1));
+    df.write().format("org.apache.hudi").
+        options(QuickstartUtils.getQuickstartWriteConfigs()).
+        option(HoodieWriteConfig.PRECOMBINE_FIELD_NAME.key(), "ts").
+        option(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), "uuid").
+        option(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), "partitionpath").
+        option(TBL_NAME.key(), tableName).
+        mode(Overwrite).
+        save(tablePath);
+  }
+
+  /**
+   * Load the data files into a DataFrame.
+   */
+  public static void queryData(SparkSession spark, JavaSparkContext jsc, String tablePath, String tableName,
+                               HoodieExampleDataGenerator<HoodieAvroPayload> dataGen) {
+    Dataset<Row> roViewDF = spark.
+        read().
+        format("org.apache.hudi").
+        load(tablePath + "/*/*/*/*");
+
+    roViewDF.createOrReplaceTempView("hudi_ro_table");
+
+    spark.sql("select fare, begin_lon, begin_lat, ts from  hudi_ro_table where fare > 20.0").show();
+    //  +-----------------+-------------------+-------------------+---+
+    //  |             fare|          begin_lon|          begin_lat| ts|
+    //  +-----------------+-------------------+-------------------+---+
+    //  |98.88075495133515|0.39556048623031603|0.17851135255091155|0.0|
+    //  ...
+
+    spark.sql(
+            "select _hoodie_commit_time, _hoodie_record_key, _hoodie_partition_path, rider, driver, fare from  hudi_ro_table")
+        .show();
+    //  +-------------------+--------------------+----------------------+-------------------+--------------------+------------------+
+    //  |_hoodie_commit_time|  _hoodie_record_key|_hoodie_partition_path|              rider|              driver|              fare|
+    //  +-------------------+--------------------+----------------------+-------------------+--------------------+------------------+
+    //  |     20191231181501|31cafb9f-0196-4b1...|            2020/01/02|rider-1577787297889|driver-1577787297889| 98.88075495133515|
+    //  ...
+  }
+
+  /**
+   * This is similar to inserting new data. Generate updates to existing trips using the data generator,
+   * load into a DataFrame and write DataFrame into the hudi dataset.
+   */
+  public static void updateData(SparkSession spark, JavaSparkContext jsc, String tablePath, String tableName,
+                                HoodieExampleDataGenerator<HoodieAvroPayload> dataGen) {
+
+    String commitTime = Long.toString(System.currentTimeMillis());
+    List<String> updates = dataGen.convertToStringList(dataGen.generateUpdates(commitTime, 10));
+    Dataset<Row> df = spark.read().json(jsc.parallelize(updates, 1));
+    df.write().format("org.apache.hudi").
+        options(QuickstartUtils.getQuickstartWriteConfigs()).
+        option(HoodieWriteConfig.PRECOMBINE_FIELD_NAME.key(), "ts").
+        option(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), "uuid").
+        option(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), "partitionpath").
+        option(TBL_NAME.key(), tableName).
+        mode(Append).
+        save(tablePath);
+  }
+
+  /**
+   * Deleta data based in data information.
+   */
+  public static void delete(SparkSession spark, String tablePath, String tableName) {
+
+    Dataset<Row> roViewDF = spark.read().format("org.apache.hudi").load(tablePath + "/*/*/*/*");
+    roViewDF.createOrReplaceTempView("hudi_ro_table");
+    Dataset<Row> df = spark.sql("select uuid, partitionpath, ts from  hudi_ro_table limit 2");
+
+    df.write().format("org.apache.hudi").
+        options(QuickstartUtils.getQuickstartWriteConfigs()).
+        option(HoodieWriteConfig.PRECOMBINE_FIELD_NAME.key(), "ts").
+        option(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), "uuid").
+        option(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), "partitionpath").
+        option(TBL_NAME.key(), tableName).
+        option("hoodie.datasource.write.operation", WriteOperationType.DELETE.value()).
+        mode(Append).
+        save(tablePath);
+  }
+
+  /**
+   * Delete the data of a single or multiple partitions.
+   */
+  public static void deleteByPartition(SparkSession spark, String tablePath, String tableName) {
+    Dataset<Row> df = spark.emptyDataFrame();
+    df.write().format("org.apache.hudi").
+        options(QuickstartUtils.getQuickstartWriteConfigs()).
+        option(HoodieWriteConfig.PRECOMBINE_FIELD_NAME.key(), "ts").
+        option(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), "uuid").
+        option(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), "partitionpath").
+        option(TBL_NAME.key(), tableName).
+        option("hoodie.datasource.write.operation", WriteOperationType.DELETE.value()).
+        option("hoodie.datasource.write.partitions.to.delete",
+            ArrayUtils.toString(HoodieExampleDataGenerator.DEFAULT_PARTITION_PATHS, ",")).
+        mode(Append).
+        save(tablePath);
+  }
+
+  /**
+   * Hudi also provides capability to obtain a stream of records that changed since given commit timestamp.
+   * This can be achieved using Hudi’s incremental view and providing a begin time from which changes need to be streamed.
+   * We do not need to specify endTime, if we want all changes after the given commit (as is the common case).
+   */
+  public static void incrementalQuery(SparkSession spark, String tablePath, String tableName) {
+    List<String> commits =
+        spark.sql("select distinct(_hoodie_commit_time) as commitTime from hudi_ro_table order by commitTime")
+            .toJavaRDD()
+            .map((Function<Row, String>) row -> row.getString(0))
+            .take(50);
+
+    String beginTime = commits.get(commits.size() - 2); // commit time we are interested in
+
+    // incrementally query data
+    Dataset<Row> incViewDF = spark.
+        read().
+        format("org.apache.hudi").
+        option("hoodie.datasource.query.type", "incremental").
+        option("hoodie.datasource.read.begin.instanttime", beginTime).
+        load(tablePath);
+
+    incViewDF.createOrReplaceTempView("hudi_incr_table");
+    spark.sql("select `_hoodie_commit_time`, fare, begin_lon, begin_lat, ts from hudi_incr_table where fare > 20.0")
+        .show();
+  }
+
+  /**
+   * Lets look at how to query data as of a specific time.
+   * The specific time can be represented by pointing endTime to a specific commit time
+   * and beginTime to “000” (denoting earliest possible commit time).
+   */
+  public static void pointInTimeQuery(SparkSession spark, String tablePath, String tableName) {
+    List<String> commits =
+        spark.sql("select distinct(_hoodie_commit_time) as commitTime from  hudi_ro_table order by commitTime")
+            .toJavaRDD()
+            .map((Function<Row, String>) row -> row.getString(0))
+            .take(50);
+    String beginTime = "000"; // Represents all commits > this time.
+    String endTime = commits.get(commits.size() - 2); // commit time we are interested in
+
+    //incrementally query data
+    Dataset<Row> incViewDF = spark.read().format("org.apache.hudi").
+        option("hoodie.datasource.query.type", "incremental").
+        option("hoodie.datasource.read.begin.instanttime", beginTime).
+        option("hoodie.datasource.read.end.instanttime", endTime).
+        load(tablePath);
+
+    incViewDF.createOrReplaceTempView("hudi_incr_table");
+    spark.sql("select `_hoodie_commit_time`, fare, begin_lon, begin_lat, ts from  hudi_incr_table where fare > 20.0")
+        .show();
+  }
+}
--- a/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/quickstart/QuickstartUtils.java
+++ b/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/quickstart/QuickstartUtils.java
@@ -0,0 +1,249 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.examples.quickstart;
+
+import org.apache.hudi.avro.HoodieAvroUtils;
+import org.apache.hudi.common.model.HoodieAvroRecord;
+import org.apache.hudi.common.model.HoodieKey;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.exception.HoodieIOException;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.spark.sql.Row;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.UUID;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+import java.util.stream.Stream;
+
+/**
+ * Class to be used in quickstart guide for generating inserts and updates against a corpus. Test data uses a toy Uber
+ * trips, data model.
+ */
+public class QuickstartUtils {
+
+  public static class DataGenerator {
+    private static final String DEFAULT_FIRST_PARTITION_PATH = "americas/united_states/san_francisco";
+    private static final String DEFAULT_SECOND_PARTITION_PATH = "americas/brazil/sao_paulo";
+    private static final String DEFAULT_THIRD_PARTITION_PATH = "asia/india/chennai";
+
+    private static final String[] DEFAULT_PARTITION_PATHS =
+        {DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH, DEFAULT_THIRD_PARTITION_PATH};
+    static String TRIP_EXAMPLE_SCHEMA = "{\"type\": \"record\",\"name\": \"triprec\",\"fields\": [ "
+        + "{\"name\": \"ts\",\"type\": \"long\"},{\"name\": \"uuid\", \"type\": \"string\"},"
+        + "{\"name\": \"rider\", \"type\": \"string\"},{\"name\": \"driver\", \"type\": \"string\"},"
+        + "{\"name\": \"begin_lat\", \"type\": \"double\"},{\"name\": \"begin_lon\", \"type\": \"double\"},"
+        + "{\"name\": \"end_lat\", \"type\": \"double\"},{\"name\": \"end_lon\", \"type\": \"double\"},"
+        + "{\"name\":\"fare\",\"type\": \"double\"}]}";
+    static Schema avroSchema = new Schema.Parser().parse(TRIP_EXAMPLE_SCHEMA);
+
+    private static Random rand = new Random(46474747);
+
+    private final Map<Integer, HoodieKey> existingKeys;
+    private final String[] partitionPaths;
+    private int numExistingKeys;
+
+    public DataGenerator() {
+      this(DEFAULT_PARTITION_PATHS, new HashMap<>());
+    }
+
+    public DataGenerator(String[] partitionPaths) {
+      this(partitionPaths, new HashMap<>());
+    }
+
+    private DataGenerator(String[] partitionPaths, Map<Integer, HoodieKey> keyPartitionMap) {
+      this.partitionPaths = Arrays.copyOf(partitionPaths, partitionPaths.length);
+      this.existingKeys = keyPartitionMap;
+    }
+
+    private static String generateRandomString() {
+      int leftLimit = 48; // ascii for 0
+      int rightLimit = 57; // ascii for 9
+      int stringLength = 3;
+      StringBuilder buffer = new StringBuilder(stringLength);
+      for (int i = 0; i < stringLength; i++) {
+        int randomLimitedInt = leftLimit + (int) (rand.nextFloat() * (rightLimit - leftLimit + 1));
+        buffer.append((char) randomLimitedInt);
+      }
+      return buffer.toString();
+    }
+
+    public int getNumExistingKeys() {
+      return numExistingKeys;
+    }
+
+    public static GenericRecord generateGenericRecord(String rowKey, String riderName, String driverName,
+        long timestamp) {
+      GenericRecord rec = new GenericData.Record(avroSchema);
+      rec.put("uuid", rowKey);
+      rec.put("ts", timestamp);
+      rec.put("rider", riderName);
+      rec.put("driver", driverName);
+      rec.put("begin_lat", rand.nextDouble());
+      rec.put("begin_lon", rand.nextDouble());
+      rec.put("end_lat", rand.nextDouble());
+      rec.put("end_lon", rand.nextDouble());
+      rec.put("fare", rand.nextDouble() * 100);
+      return rec;
+    }
+
+    /**
+     * Generates a new avro record of the above schema format, retaining the key if optionally provided. The
+     * riderDriverSuffix string is a random String to simulate updates by changing the rider driver fields for records
+     * belonging to the same commit. It is purely used for demo purposes. In real world, the actual updates are assumed
+     * to be provided based on the application requirements.
+     */
+    public static OverwriteWithLatestAvroPayload generateRandomValue(HoodieKey key, String riderDriverSuffix)
+        throws IOException {
+      // The timestamp generated is limited to range from 7 days before to now, to avoid generating too many
+      // partitionPaths when user use timestamp as partitionPath filed.
+      GenericRecord rec =
+          generateGenericRecord(key.getRecordKey(), "rider-" + riderDriverSuffix, "driver-"
+              + riderDriverSuffix, generateRangeRandomTimestamp(7));
+      return new OverwriteWithLatestAvroPayload(Option.of(rec));
+    }
+
+    /**
+     * Generate timestamp range from {@param daysTillNow} before to now.
+     */
+    private static long generateRangeRandomTimestamp(int daysTillNow) {
+      long maxIntervalMillis = daysTillNow * 24 * 60 * 60 * 1000L;
+      return System.currentTimeMillis() - (long)(Math.random() * maxIntervalMillis);
+    }
+
+    /**
+     * Generates new inserts, uniformly across the partition paths above. It also updates the list of existing keys.
+     */
+    public Stream<HoodieRecord> generateInsertsStream(String randomString, Integer n) {
+      int currSize = getNumExistingKeys();
+
+      return IntStream.range(0, n).boxed().map(i -> {
+        String partitionPath = partitionPaths[rand.nextInt(partitionPaths.length)];
+        HoodieKey key = new HoodieKey(UUID.randomUUID().toString(), partitionPath);
+        existingKeys.put(currSize + i, key);
+        numExistingKeys++;
+        try {
+          return new HoodieAvroRecord(key, generateRandomValue(key, randomString));
+        } catch (IOException e) {
+          throw new HoodieIOException(e.getMessage(), e);
+        }
+      });
+    }
+
+    /**
+     * Generates new inserts, uniformly across the partition paths above. It also updates the list of existing keys.
+     */
+    public List<HoodieRecord> generateInserts(Integer n) throws IOException {
+      String randomString = generateRandomString();
+      return generateInsertsStream(randomString, n).collect(Collectors.toList());
+    }
+
+    public HoodieRecord generateUpdateRecord(HoodieKey key, String randomString) throws IOException {
+      return new HoodieAvroRecord(key, generateRandomValue(key, randomString));
+    }
+
+    /**
+     * Generates new updates, randomly distributed across the keys above. There can be duplicates within the returned
+     * list
+     *
+     * @param n Number of updates (including dups)
+     * @return list of hoodie record updates
+     */
+    public List<HoodieRecord> generateUpdates(Integer n) {
+      if (numExistingKeys == 0) {
+        throw new HoodieException("Data must have been written before performing the update operation");
+      }
+      String randomString = generateRandomString();
+      return IntStream.range(0, n).boxed().map(x -> {
+        try {
+          return generateUpdateRecord(existingKeys.get(rand.nextInt(numExistingKeys)), randomString);
+        } catch (IOException e) {
+          throw new HoodieIOException(e.getMessage(), e);
+        }
+      }).collect(Collectors.toList());
+    }
+
+    /**
+     * Generates delete records for the passed in rows.
+     *
+     * @param rows List of {@link Row}s for which delete record need to be generated
+     * @return list of hoodie records to delete
+     */
+    public List<String> generateDeletes(List<Row> rows) {
+      // if row.length() == 2, then the record contains "uuid" and "partitionpath" fields, otherwise,
+      // another field "ts" is available
+      return rows.stream().map(row -> row.length() == 2
+          ? convertToString(row.getAs("uuid"), row.getAs("partitionpath"), null) :
+          convertToString(row.getAs("uuid"), row.getAs("partitionpath"), row.getAs("ts"))
+      ).filter(os -> os.isPresent()).map(os -> os.get())
+          .collect(Collectors.toList());
+    }
+
+    public void close() {
+      existingKeys.clear();
+    }
+  }
+
+  private static Option<String> convertToString(HoodieRecord record) {
+    try {
+      String str = HoodieAvroUtils
+          .bytesToAvro(((OverwriteWithLatestAvroPayload) record.getData()).recordBytes, DataGenerator.avroSchema)
+          .toString();
+      str = "{" + str.substring(str.indexOf("\"ts\":"));
+      return Option.of(str.replaceAll("}", ", \"partitionpath\": \"" + record.getPartitionPath() + "\"}"));
+    } catch (IOException e) {
+      return Option.empty();
+    }
+  }
+
+  private static Option<String> convertToString(String uuid, String partitionPath, Long ts) {
+    StringBuffer stringBuffer = new StringBuffer();
+    stringBuffer.append("{");
+    stringBuffer.append("\"ts\": \"" + (ts == null ? "0.0" : ts) + "\",");
+    stringBuffer.append("\"uuid\": \"" + uuid + "\",");
+    stringBuffer.append("\"partitionpath\": \"" + partitionPath + "\"");
+    stringBuffer.append("}");
+    return Option.of(stringBuffer.toString());
+  }
+
+  public static List<String> convertToStringList(List<HoodieRecord> records) {
+    return records.stream().map(hr -> convertToString(hr)).filter(os -> os.isPresent()).map(os -> os.get())
+        .collect(Collectors.toList());
+  }
+
+  public static Map<String, String> getQuickstartWriteConfigs() {
+    Map<String, String> demoConfigs = new HashMap<>();
+    demoConfigs.put("hoodie.insert.shuffle.parallelism", "2");
+    demoConfigs.put("hoodie.upsert.shuffle.parallelism", "2");
+    demoConfigs.put("hoodie.bulkinsert.shuffle.parallelism", "2");
+    demoConfigs.put("hoodie.delete.shuffle.parallelism", "2");
+    return demoConfigs;
+  }
+}
--- a/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/spark/HoodieSparkBootstrapExample.java
+++ b/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/spark/HoodieSparkBootstrapExample.java
--- a/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/spark/HoodieWriteClientExample.java
+++ b/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/spark/HoodieWriteClientExample.java
--- a/hudi-examples/hudi-examples-spark/src/main/resources/delta-streamer-config/dfs/source-file.json
+++ b/hudi-examples/hudi-examples-spark/src/main/resources/delta-streamer-config/dfs/source-file.json
--- a/hudi-examples/hudi-examples-spark/src/main/resources/delta-streamer-config/kafka/kafka-source.properties
+++ b/hudi-examples/hudi-examples-spark/src/main/resources/delta-streamer-config/kafka/kafka-source.properties
--- a/hudi-examples/hudi-examples-spark/src/main/scala/org/apache/hudi/examples/spark/HoodieDataSourceExample.scala
+++ b/hudi-examples/hudi-examples-spark/src/main/scala/org/apache/hudi/examples/spark/HoodieDataSourceExample.scala
@@ -20,7 +20,7 @@ package org.apache.hudi.examples.spark

 import org.apache.hudi.DataSourceReadOptions.{BEGIN_INSTANTTIME, END_INSTANTTIME, QUERY_TYPE, QUERY_TYPE_INCREMENTAL_OPT_VAL}
 import org.apache.hudi.DataSourceWriteOptions.{PARTITIONPATH_FIELD, PRECOMBINE_FIELD, RECORDKEY_FIELD, PARTITIONS_TO_DELETE, OPERATION, DELETE_PARTITION_OPERATION_OPT_VAL, DELETE_OPERATION_OPT_VAL}
-import org.apache.hudi.QuickstartUtils.getQuickstartWriteConfigs
+import org.apache.hudi.examples.quickstart.QuickstartUtils.getQuickstartWriteConfigs
 import org.apache.hudi.common.model.HoodieAvroPayload
 import org.apache.hudi.config.HoodieWriteConfig.TBL_NAME
 import org.apache.hudi.examples.common.{HoodieExampleDataGenerator, HoodieExampleSparkUtils}
@@ -172,7 +172,7 @@ object HoodieDataSourceExample {
    * This can be achieved using Hudi’s incremental view and providing a begin time from which changes need to be streamed.
    * We do not need to specify endTime, if we want all changes after the given commit (as is the common case).
    */
-  def incrementalQuery(spark: SparkSession, tablePath: String, tableName: String) {
+  def incrementalQuery(spark: SparkSession, tablePath: String, tableName: String): Unit = {
    import spark.implicits._
    val commits = spark.sql("select distinct(_hoodie_commit_time) as commitTime from hudi_ro_table order by commitTime").map(k => k.getString(0)).take(50)
    val beginTime = commits(commits.length - 2) // commit time we are interested in
@@ -193,7 +193,7 @@ object HoodieDataSourceExample {
    * The specific time can be represented by pointing endTime to a specific commit time
    * and beginTime to “000” (denoting earliest possible commit time).
    */
-  def pointInTimeQuery(spark: SparkSession, tablePath: String, tableName: String) {
+  def pointInTimeQuery(spark: SparkSession, tablePath: String, tableName: String): Unit = {
    import spark.implicits._
    val commits = spark.sql("select distinct(_hoodie_commit_time) as commitTime from  hudi_ro_table order by commitTime").map(k => k.getString(0)).take(50)
    val beginTime = "000" // Represents all commits > this time.
--- a/hudi-examples/hudi-examples-spark/src/main/scala/org/apache/hudi/examples/spark/HoodieMorCompactionJob.scala
+++ b/hudi-examples/hudi-examples-spark/src/main/scala/org/apache/hudi/examples/spark/HoodieMorCompactionJob.scala
@@ -20,7 +20,7 @@
 package org.apache.hudi.examples.spark

 import org.apache.hudi.DataSourceWriteOptions.{PARTITIONPATH_FIELD, PRECOMBINE_FIELD, RECORDKEY_FIELD, TABLE_TYPE}
-import org.apache.hudi.QuickstartUtils.getQuickstartWriteConfigs
+import org.apache.hudi.examples.quickstart.QuickstartUtils.getQuickstartWriteConfigs
 import org.apache.hudi.client.SparkRDDWriteClient
 import org.apache.hudi.client.common.HoodieSparkEngineContext
 import org.apache.hudi.common.model.{HoodieAvroPayload, HoodieRecordPayload, HoodieTableType}
@@ -55,6 +55,7 @@ object HoodieMorCompactionJob {
    val dataGen = new HoodieExampleDataGenerator[HoodieAvroPayload]
    val tablePath = args(0)
    val tableName = args(1)
+
    insertData(spark, tablePath, tableName, dataGen, HoodieTableType.MERGE_ON_READ.name())
    updateData(spark, tablePath, tableName, dataGen, HoodieTableType.MERGE_ON_READ.name())
    val cfg = HoodieWriteConfig.newBuilder()
--- a/hudi-examples/hudi-examples-spark/src/test/java/org/apache/hudi/examples/quickstart/TestHoodieSparkQuickstart.java
+++ b/hudi-examples/hudi-examples-spark/src/test/java/org/apache/hudi/examples/quickstart/TestHoodieSparkQuickstart.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.examples.quickstart;
+
+import java.io.File;
+import java.nio.file.Paths;
+import org.apache.hudi.client.HoodieReadClient;
+import org.apache.hudi.client.SparkRDDWriteClient;
+import org.apache.hudi.client.common.HoodieSparkEngineContext;
+import org.apache.hudi.common.model.HoodieAvroPayload;
+import org.apache.hudi.examples.common.HoodieExampleDataGenerator;
+import org.apache.hudi.testutils.providers.SparkProvider;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.util.Utils;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+public class TestHoodieSparkQuickstart implements SparkProvider {
+  protected static transient HoodieSparkEngineContext context;
+
+  private static transient SparkSession spark;
+  private static transient SQLContext sqlContext;
+  private static transient JavaSparkContext jsc;
+
+  /**
+   * An indicator of the initialization status.
+   */
+  protected boolean initialized = false;
+  @TempDir
+  protected java.nio.file.Path tempDir;
+
+  private static final HoodieExampleDataGenerator<HoodieAvroPayload> dataGen = new HoodieExampleDataGenerator<>();
+
+  @Override
+  public SparkSession spark() {
+    return spark;
+  }
+
+  @Override
+  public SQLContext sqlContext() {
+    return sqlContext;
+  }
+
+  @Override
+  public JavaSparkContext jsc() {
+    return jsc;
+  }
+
+  @Override
+  public HoodieSparkEngineContext context() {
+    return context;
+  }
+
+  public String basePath() {
+    return tempDir.toAbsolutePath().toString();
+  }
+
+  public String tablePath(String tableName) {
+    return Paths.get(basePath(), tableName).toString();
+  }
+
+  @BeforeEach
+  public synchronized void runBeforeEach() {
+    initialized = spark != null;
+    if (!initialized) {
+      SparkConf sparkConf = conf();
+      SparkRDDWriteClient.registerClasses(sparkConf);
+      HoodieReadClient.addHoodieSupport(sparkConf);
+      spark = SparkSession.builder().config(sparkConf).getOrCreate();
+      sqlContext = spark.sqlContext();
+      jsc = new JavaSparkContext(spark.sparkContext());
+      context = new HoodieSparkEngineContext(jsc);
+    }
+  }
+
+  @Test
+  public void testHoodieSparkQuickstart() {
+    String tableName = "spark_quick_start";
+    String tablePath = tablePath(tableName);
+
+    try {
+      HoodieSparkQuickstart.insertData(spark, jsc, tablePath, tableName, dataGen);
+      HoodieSparkQuickstart.updateData(spark, jsc, tablePath, tableName, dataGen);
+
+      HoodieSparkQuickstart.queryData(spark, jsc, tablePath, tableName, dataGen);
+      HoodieSparkQuickstart.incrementalQuery(spark, tablePath, tableName);
+      HoodieSparkQuickstart.pointInTimeQuery(spark, tablePath, tableName);
+
+      HoodieSparkQuickstart.delete(spark, tablePath, tableName);
+      HoodieSparkQuickstart.deleteByPartition(spark, tablePath, tableName);
+    } finally {
+      Utils.deleteRecursively(new File(tablePath));
+    }
+  }
+}
--- a/hudi-examples/hudi-examples-spark/src/test/java/org/apache/hudi/examples/quickstart/TestQuickstartUtils.java
+++ b/hudi-examples/hudi-examples-spark/src/test/java/org/apache/hudi/examples/quickstart/TestQuickstartUtils.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.examples.quickstart;
+
+import org.apache.hudi.exception.HoodieException;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.mockito.junit.jupiter.MockitoExtension;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+@ExtendWith(MockitoExtension.class)
+public class TestQuickstartUtils {
+
+  @Test
+  public void testGenerateUpdates() throws Exception {
+    QuickstartUtils.DataGenerator dataGenerator = new QuickstartUtils.DataGenerator();
+
+    //Call generateUpdates directly then throws HoodieException
+    assertEquals(Assertions.assertThrows(HoodieException.class, () -> {
+      dataGenerator.generateUpdates(10);
+    }).getMessage(), "Data must have been written before performing the update operation");
+
+    //Execute generateInserts before executing generateUpdates then no exception
+    assertEquals(dataGenerator.generateInserts(10).size(), 10);
+    assertEquals(dataGenerator.generateUpdates(10).size(), 10);
+  }
+}
--- a/hudi-examples/hudi-examples-spark/src/test/resources/log4j-surefire-quiet.properties
+++ b/hudi-examples/hudi-examples-spark/src/test/resources/log4j-surefire-quiet.properties
@@ -0,0 +1,30 @@
+###
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+###
+log4j.rootLogger=WARN, CONSOLE
+log4j.logger.org.apache.hudi=DEBUG
+log4j.logger.org.apache.hadoop.hbase=ERROR
+
+# CONSOLE is set to be a ConsoleAppender.
+log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
+# CONSOLE uses PatternLayout.
+log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
+log4j.appender.CONSOLE.layout.ConversionPattern=[%-5p] %d %c %x - %m%n
+log4j.appender.CONSOLE.filter.a=org.apache.log4j.varia.LevelRangeFilter
+log4j.appender.CONSOLE.filter.a.AcceptOnMatch=true
+log4j.appender.CONSOLE.filter.a.LevelMin=WARN
+log4j.appender.CONSOLE.filter.a.LevelMax=FATAL
--- a/hudi-examples/hudi-examples-spark/src/test/resources/log4j-surefire.properties
+++ b/hudi-examples/hudi-examples-spark/src/test/resources/log4j-surefire.properties
@@ -0,0 +1,31 @@
+###
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+###
+log4j.rootLogger=INFO, CONSOLE
+log4j.logger.org.apache=INFO
+log4j.logger.org.apache.hudi=DEBUG
+log4j.logger.org.apache.hadoop.hbase=ERROR
+
+# A1 is set to be a ConsoleAppender.
+log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
+# A1 uses PatternLayout.
+log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
+log4j.appender.CONSOLE.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
+log4j.appender.CONSOLE.filter.a=org.apache.log4j.varia.LevelRangeFilter
+log4j.appender.CONSOLE.filter.a.AcceptOnMatch=true
+log4j.appender.CONSOLE.filter.a.LevelMin=INFO
+log4j.appender.CONSOLE.filter.a.LevelMax=FATAL
--- a/hudi-examples/pom.xml
+++ b/hudi-examples/pom.xml
@@ -25,204 +25,13 @@
  <modelVersion>4.0.0</modelVersion>

  <artifactId>hudi-examples</artifactId>
-  <packaging>jar</packaging>
+  <packaging>pom</packaging>

-  <properties>
-    <main.basedir>${project.parent.basedir}</main.basedir>
-    <checkstyle.skip>true</checkstyle.skip>
-  </properties>
+  <modules>
+    <module>hudi-examples-common</module>
+    <module>hudi-examples-spark</module>
+    <module>hudi-examples-flink</module>
+    <module>hudi-examples-java</module>
+  </modules>

-  <build>
-    <resources>
-      <resource>
-        <directory>src/main/resources</directory>
-      </resource>
-    </resources>
-
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-dependency-plugin</artifactId>
-        <executions>
-          <execution>
-            <id>copy-dependencies</id>
-            <phase>prepare-package</phase>
-            <goals>
-              <goal>copy-dependencies</goal>
-            </goals>
-            <configuration>
-              <outputDirectory>${project.build.directory}/lib</outputDirectory>
-              <overWriteReleases>true</overWriteReleases>
-              <overWriteSnapshots>true</overWriteSnapshots>
-              <overWriteIfNewer>true</overWriteIfNewer>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-      <plugin>
-        <groupId>net.alchim31.maven</groupId>
-        <artifactId>scala-maven-plugin</artifactId>
-        <executions>
-          <execution>
-            <id>scala-compile-first</id>
-            <phase>process-resources</phase>
-            <goals>
-              <goal>add-source</goal>
-              <goal>compile</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-compiler-plugin</artifactId>
-        <executions>
-          <execution>
-            <phase>compile</phase>
-            <goals>
-              <goal>compile</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-jar-plugin</artifactId>
-        <executions>
-          <execution>
-            <goals>
-              <goal>test-jar</goal>
-            </goals>
-            <phase>test-compile</phase>
-          </execution>
-        </executions>
-        <configuration>
-          <skip>false</skip>
-        </configuration>
-      </plugin>
-      <plugin>
-        <groupId>org.apache.rat</groupId>
-        <artifactId>apache-rat-plugin</artifactId>
-      </plugin>
-    </plugins>
-  </build>
-
-  <dependencies>
-
-    <dependency>
-      <groupId>org.scala-lang</groupId>
-      <artifactId>scala-library</artifactId>
-      <version>${scala.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.hudi</groupId>
-      <artifactId>hudi-common</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.hudi</groupId>
-      <artifactId>hudi-cli</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.hudi</groupId>
-      <artifactId>hudi-client-common</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.hudi</groupId>
-      <artifactId>hudi-java-client</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.hudi</groupId>
-      <artifactId>hudi-spark-client</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.hudi</groupId>
-      <artifactId>hudi-utilities_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.hudi</groupId>
-      <artifactId>hudi-spark_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.hudi</groupId>
-      <artifactId>hudi-hadoop-mr</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.hudi</groupId>
-      <artifactId>hudi-timeline-service</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-
-    <!-- Spark -->
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_${scala.binary.version}</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-sql_${scala.binary.version}</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-avro_${scala.binary.version}</artifactId>
-    </dependency>
-
-    <!-- Parquet -->
-    <dependency>
-      <groupId>org.apache.parquet</groupId>
-      <artifactId>parquet-hadoop</artifactId>
-      <version>${parquet.version}</version>
-    </dependency>
-
-    <!-- Avro -->
-    <dependency>
-      <groupId>org.apache.avro</groupId>
-      <artifactId>avro</artifactId>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.parquet</groupId>
-      <artifactId>parquet-avro</artifactId>
-    </dependency>
-
-    <!--  Hive  -->
-    <dependency>
-      <groupId>${hive.groupid}</groupId>
-      <artifactId>hive-common</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>${hive.groupid}</groupId>
-      <artifactId>hive-exec</artifactId>
-      <version>${hive.version}</version>
-      <scope>provided</scope>
-      <classifier>${hive.exec.classifier}</classifier>
-      <exclusions>
-        <exclusion>
-          <groupId>javax.mail</groupId>
-          <artifactId>mail</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.eclipse.jetty.aggregate</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-  </dependencies>
 </project>