[HUDI-2209] Bulk insert for flink writer (#3334)

2021-07-27 10:58:23 +08:00
parent 024cf01f02
commit 9d2a65a6a6
26 changed files with 2000 additions and 83 deletions
--- a/hudi-client/hudi-flink-client/pom.xml
+++ b/hudi-client/hudi-flink-client/pom.xml
@@ -52,6 +52,30 @@
      <version>${flink.version}</version>
      <scope>provided</scope>
    </dependency>
+    <dependency>
+      <groupId>org.apache.flink</groupId>
+      <artifactId>flink-table-common</artifactId>
+      <version>${flink.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.flink</groupId>
+      <artifactId>flink-parquet_${scala.binary.version}</artifactId>
+      <version>${flink.version}</version>
+      <scope>provided</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>org.apache.parquet</groupId>
+          <artifactId>parquet-hadoop</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.flink</groupId>
+      <artifactId>flink-avro</artifactId>
+      <version>${flink.version}</version>
+      <scope>provided</scope>
+    </dependency>

    <!-- Parquet -->
    <dependency>
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/client/model/HoodieRowData.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/client/model/HoodieRowData.java
@@ -0,0 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.client.model;
+
+import org.apache.hudi.common.model.HoodieRecord;
+
+import org.apache.flink.table.data.ArrayData;
+import org.apache.flink.table.data.DecimalData;
+import org.apache.flink.table.data.MapData;
+import org.apache.flink.table.data.RawValueData;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.data.StringData;
+import org.apache.flink.table.data.TimestampData;
+import org.apache.flink.types.RowKind;
+
+/**
+ * RowData implementation for Hoodie Row. It wraps an {@link RowData} and keeps meta columns locally. But the {@link RowData}
+ * does include the meta columns as well just that {@link HoodieRowData} will intercept queries for meta columns and serve from its
+ * copy rather than fetching from {@link RowData}.
+ */
+public class HoodieRowData implements RowData {
+
+  private final String commitTime;
+  private final String commitSeqNumber;
+  private final String recordKey;
+  private final String partitionPath;
+  private final String fileName;
+  private final RowData row;
+  private final int metaColumnsNum;
+
+  public HoodieRowData(String commitTime,
+                       String commitSeqNumber,
+                       String recordKey,
+                       String partitionPath,
+                       String fileName,
+                       RowData row) {
+    this.commitTime = commitTime;
+    this.commitSeqNumber = commitSeqNumber;
+    this.recordKey = recordKey;
+    this.partitionPath = partitionPath;
+    this.fileName = fileName;
+    this.row = row;
+    this.metaColumnsNum = HoodieRecord.HOODIE_META_COLUMNS.size();
+  }
+
+  @Override
+  public int getArity() {
+    return metaColumnsNum + row.getArity();
+  }
+
+  @Override
+  public RowKind getRowKind() {
+    return row.getRowKind();
+  }
+
+  @Override
+  public void setRowKind(RowKind kind) {
+    this.row.setRowKind(kind);
+  }
+
+  private String getMetaColumnVal(int ordinal) {
+    switch (ordinal) {
+      case 0: {
+        return commitTime;
+      }
+      case 1: {
+        return commitSeqNumber;
+      }
+      case 2: {
+        return recordKey;
+      }
+      case 3: {
+        return partitionPath;
+      }
+      case 4: {
+        return fileName;
+      }
+      default:
+        throw new IllegalArgumentException("Not expected");
+    }
+  }
+
+  @Override
+  public boolean isNullAt(int ordinal) {
+    if (ordinal < metaColumnsNum) {
+      return null == getMetaColumnVal(ordinal);
+    }
+    return row.isNullAt(ordinal - metaColumnsNum);
+  }
+
+  @Override
+  public boolean getBoolean(int ordinal) {
+    return row.getBoolean(ordinal - metaColumnsNum);
+  }
+
+  @Override
+  public byte getByte(int ordinal) {
+    return row.getByte(ordinal - metaColumnsNum);
+  }
+
+  @Override
+  public short getShort(int ordinal) {
+    return row.getShort(ordinal - metaColumnsNum);
+  }
+
+  @Override
+  public int getInt(int ordinal) {
+    return row.getInt(ordinal - metaColumnsNum);
+  }
+
+  @Override
+  public long getLong(int ordinal) {
+    return row.getLong(ordinal - metaColumnsNum);
+  }
+
+  @Override
+  public float getFloat(int ordinal) {
+    return row.getFloat(ordinal - metaColumnsNum);
+  }
+
+  @Override
+  public double getDouble(int ordinal) {
+    return row.getDouble(ordinal - metaColumnsNum);
+  }
+
+  @Override
+  public DecimalData getDecimal(int ordinal, int precision, int scale) {
+    return row.getDecimal(ordinal - metaColumnsNum, precision, scale);
+  }
+
+  @Override
+  public TimestampData getTimestamp(int pos, int precision) {
+    return row.getTimestamp(pos - metaColumnsNum, precision);
+  }
+
+  @Override
+  public <T> RawValueData<T> getRawValue(int pos) {
+    return row.getRawValue(pos - metaColumnsNum);
+  }
+
+  @Override
+  public StringData getString(int ordinal) {
+    if (ordinal < HoodieRecord.HOODIE_META_COLUMNS.size()) {
+      return StringData.fromString(getMetaColumnVal(ordinal));
+    }
+    return row.getString(ordinal - metaColumnsNum);
+  }
+
+  @Override
+  public byte[] getBinary(int ordinal) {
+    return row.getBinary(ordinal - metaColumnsNum);
+  }
+
+  @Override
+  public RowData getRow(int ordinal, int numFields) {
+    return row.getRow(ordinal - metaColumnsNum, numFields);
+  }
+
+  @Override
+  public ArrayData getArray(int ordinal) {
+    return row.getArray(ordinal - metaColumnsNum);
+  }
+
+  @Override
+  public MapData getMap(int ordinal) {
+    return row.getMap(ordinal - metaColumnsNum);
+  }
+}
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataCreateHandle.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataCreateHandle.java
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.io.storage.row;
+
+import org.apache.hudi.client.HoodieInternalWriteStatus;
+import org.apache.hudi.client.model.HoodieRowData;
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.HoodiePartitionMetadata;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieWriteStat;
+import org.apache.hudi.common.model.IOType;
+import org.apache.hudi.common.table.HoodieTableConfig;
+import org.apache.hudi.common.util.HoodieTimer;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.exception.HoodieIOException;
+import org.apache.hudi.exception.HoodieInsertException;
+import org.apache.hudi.table.HoodieTable;
+import org.apache.hudi.table.MarkerFiles;
+
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.types.logical.RowType;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.concurrent.atomic.AtomicLong;
+
+/**
+ * Create handle with RowData for datasource implemention of bulk insert.
+ */
+public class HoodieRowDataCreateHandle implements Serializable {
+
+  private static final long serialVersionUID = 1L;
+  private static final Logger LOG = LogManager.getLogger(HoodieRowDataCreateHandle.class);
+  private static final AtomicLong SEQGEN = new AtomicLong(1);
+
+  private final String instantTime;
+  private final int taskPartitionId;
+  private final long taskId;
+  private final long taskEpochId;
+  private final HoodieTable table;
+  private final HoodieWriteConfig writeConfig;
+  protected final HoodieRowDataFileWriter fileWriter;
+  private final String partitionPath;
+  private final Path path;
+  private final String fileId;
+  private final FileSystem fs;
+  protected final HoodieInternalWriteStatus writeStatus;
+  private final HoodieTimer currTimer;
+
+  public HoodieRowDataCreateHandle(HoodieTable table, HoodieWriteConfig writeConfig, String partitionPath, String fileId,
+                                   String instantTime, int taskPartitionId, long taskId, long taskEpochId,
+                                   RowType rowType) {
+    this.partitionPath = partitionPath;
+    this.table = table;
+    this.writeConfig = writeConfig;
+    this.instantTime = instantTime;
+    this.taskPartitionId = taskPartitionId;
+    this.taskId = taskId;
+    this.taskEpochId = taskEpochId;
+    this.fileId = fileId;
+    this.currTimer = new HoodieTimer();
+    this.currTimer.startTimer();
+    this.fs = table.getMetaClient().getFs();
+    this.path = makeNewPath(partitionPath);
+    this.writeStatus = new HoodieInternalWriteStatus(!table.getIndex().isImplicitWithStorage(),
+        writeConfig.getWriteStatusFailureFraction());
+    writeStatus.setPartitionPath(partitionPath);
+    writeStatus.setFileId(fileId);
+    try {
+      HoodiePartitionMetadata partitionMetadata =
+          new HoodiePartitionMetadata(
+              fs,
+              instantTime,
+              new Path(writeConfig.getBasePath()),
+              FSUtils.getPartitionPath(writeConfig.getBasePath(), partitionPath));
+      partitionMetadata.trySave(taskPartitionId);
+      createMarkerFile(partitionPath, FSUtils.makeDataFileName(this.instantTime, getWriteToken(), this.fileId, table.getBaseFileExtension()));
+      this.fileWriter = createNewFileWriter(path, table, writeConfig, rowType);
+    } catch (IOException e) {
+      throw new HoodieInsertException("Failed to initialize file writer for path " + path, e);
+    }
+    LOG.info("New handle created for partition :" + partitionPath + " with fileId " + fileId);
+  }
+
+  /**
+   * Writes an {@link RowData} to the underlying {@link HoodieRowDataFileWriter}.
+   * Before writing, value for meta columns are computed as required
+   * and wrapped in {@link HoodieRowData}. {@link HoodieRowData} is what gets written to HoodieRowDataFileWriter.
+   *
+   * @param recordKey     The record key
+   * @param partitionPath The partition path
+   * @param record        instance of {@link RowData} that needs to be written to the fileWriter.
+   * @throws IOException
+   */
+  public void write(String recordKey, String partitionPath, RowData record) throws IOException {
+    try {
+      String seqId = HoodieRecord.generateSequenceId(instantTime, taskPartitionId, SEQGEN.getAndIncrement());
+      HoodieRowData rowData = new HoodieRowData(instantTime, seqId, recordKey, partitionPath, path.getName(),
+          record);
+      try {
+        fileWriter.writeRow(recordKey, rowData);
+        writeStatus.markSuccess(recordKey);
+      } catch (Throwable t) {
+        writeStatus.markFailure(recordKey, t);
+      }
+    } catch (Throwable ge) {
+      writeStatus.setGlobalError(ge);
+      throw ge;
+    }
+  }
+
+  /**
+   * @returns {@code true} if this handle can take in more writes. else {@code false}.
+   */
+  public boolean canWrite() {
+    return fileWriter.canWrite();
+  }
+
+  /**
+   * Closes the {@link HoodieRowDataCreateHandle} and returns an instance of {@link HoodieInternalWriteStatus} containing the stats and
+   * status of the writes to this handle.
+   *
+   * @return the {@link HoodieInternalWriteStatus} containing the stats and status of the writes to this handle.
+   * @throws IOException
+   */
+  public HoodieInternalWriteStatus close() throws IOException {
+    fileWriter.close();
+    HoodieWriteStat stat = new HoodieWriteStat();
+    stat.setPartitionPath(partitionPath);
+    stat.setNumWrites(writeStatus.getTotalRecords());
+    stat.setNumDeletes(0);
+    stat.setNumInserts(writeStatus.getTotalRecords());
+    stat.setPrevCommit(HoodieWriteStat.NULL_COMMIT);
+    stat.setFileId(fileId);
+    stat.setPath(new Path(writeConfig.getBasePath()), path);
+    long fileSizeInBytes = FSUtils.getFileSize(table.getMetaClient().getFs(), path);
+    stat.setTotalWriteBytes(fileSizeInBytes);
+    stat.setFileSizeInBytes(fileSizeInBytes);
+    stat.setTotalWriteErrors(writeStatus.getFailedRowsSize());
+    HoodieWriteStat.RuntimeStats runtimeStats = new HoodieWriteStat.RuntimeStats();
+    runtimeStats.setTotalCreateTime(currTimer.endTimer());
+    stat.setRuntimeStats(runtimeStats);
+    writeStatus.setStat(stat);
+    return writeStatus;
+  }
+
+  public String getFileName() {
+    return path.getName();
+  }
+
+  private Path makeNewPath(String partitionPath) {
+    Path path = FSUtils.getPartitionPath(writeConfig.getBasePath(), partitionPath);
+    try {
+      if (!fs.exists(path)) {
+        fs.mkdirs(path); // create a new partition as needed.
+      }
+    } catch (IOException e) {
+      throw new HoodieIOException("Failed to make dir " + path, e);
+    }
+    HoodieTableConfig tableConfig = table.getMetaClient().getTableConfig();
+    return new Path(path.toString(), FSUtils.makeDataFileName(instantTime, getWriteToken(), fileId,
+        tableConfig.getBaseFileFormat().getFileExtension()));
+  }
+
+  /**
+   * Creates an empty marker file corresponding to storage writer path.
+   *
+   * @param partitionPath Partition path
+   */
+  private void createMarkerFile(String partitionPath, String dataFileName) {
+    MarkerFiles markerFiles = new MarkerFiles(table, instantTime);
+    markerFiles.create(partitionPath, dataFileName, IOType.CREATE);
+  }
+
+  private String getWriteToken() {
+    return taskPartitionId + "-" + taskId + "-" + taskEpochId;
+  }
+
+  protected HoodieRowDataFileWriter createNewFileWriter(
+      Path path, HoodieTable hoodieTable, HoodieWriteConfig config, RowType rowType)
+      throws IOException {
+    return HoodieRowDataFileWriterFactory.getRowDataFileWriter(
+        path, hoodieTable, config, rowType);
+  }
+}
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataFileWriter.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataFileWriter.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.io.storage.row;
+
+import org.apache.flink.table.data.RowData;
+
+import java.io.IOException;
+
+/**
+ * Abstraction to assist in writing {@link RowData}s to be used in datasource implementation.
+ */
+public interface HoodieRowDataFileWriter {
+
+  /**
+   * Returns {@code true} if this RowFileWriter can take in more writes. else {@code false}.
+   */
+  boolean canWrite();
+
+  /**
+   * Writes an {@link RowData} to the {@link HoodieRowDataFileWriter}. Also takes in associated record key to be added to bloom filter if required.
+   *
+   * @throws IOException on any exception while writing.
+   */
+  void writeRow(String key, RowData row) throws IOException;
+
+  /**
+   * Writes an {@link RowData} to the {@link HoodieRowDataFileWriter}.
+   *
+   * @throws IOException on any exception while writing.
+   */
+  void writeRow(RowData row) throws IOException;
+
+  /**
+   * Closes the {@link HoodieRowDataFileWriter} and may not take in any more writes.
+   */
+  void close() throws IOException;
+}
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataFileWriterFactory.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataFileWriterFactory.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.io.storage.row;
+
+import org.apache.hudi.common.bloom.BloomFilter;
+import org.apache.hudi.common.bloom.BloomFilterFactory;
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.table.HoodieTable;
+
+import org.apache.flink.table.types.logical.RowType;
+import org.apache.hadoop.fs.Path;
+
+import java.io.IOException;
+
+import static org.apache.hudi.common.model.HoodieFileFormat.PARQUET;
+
+/**
+ * Factory to assist in instantiating a new {@link HoodieRowDataFileWriter}.
+ */
+public class HoodieRowDataFileWriterFactory {
+
+  /**
+   * Factory method to assist in instantiating an instance of {@link HoodieRowDataFileWriter}.
+   *
+   * @param path        path of the RowFileWriter.
+   * @param hoodieTable instance of {@link HoodieTable} in use.
+   * @param config      instance of {@link HoodieWriteConfig} to use.
+   * @param schema      schema of the dataset in use.
+   * @return the instantiated {@link HoodieRowDataFileWriter}.
+   * @throws IOException if format is not supported or if any exception during instantiating the RowFileWriter.
+   */
+  public static HoodieRowDataFileWriter getRowDataFileWriter(
+      Path path, HoodieTable hoodieTable, HoodieWriteConfig config, RowType schema)
+      throws IOException {
+    final String extension = FSUtils.getFileExtension(path.getName());
+    if (PARQUET.getFileExtension().equals(extension)) {
+      return newParquetInternalRowFileWriter(path, config, schema, hoodieTable);
+    }
+    throw new UnsupportedOperationException(extension + " format not supported yet.");
+  }
+
+  private static HoodieRowDataFileWriter newParquetInternalRowFileWriter(
+      Path path, HoodieWriteConfig writeConfig, RowType rowType, HoodieTable table)
+      throws IOException {
+    BloomFilter filter = BloomFilterFactory.createBloomFilter(
+        writeConfig.getBloomFilterNumEntries(),
+        writeConfig.getBloomFilterFPP(),
+        writeConfig.getDynamicBloomFilterMaxNumEntries(),
+        writeConfig.getBloomFilterType());
+    HoodieRowDataParquetWriteSupport writeSupport =
+        new HoodieRowDataParquetWriteSupport(table.getHadoopConf(), rowType, filter);
+    return new HoodieRowDataParquetWriter(
+        path, new HoodieRowDataParquetConfig(
+        writeSupport,
+        writeConfig.getParquetCompressionCodec(),
+        writeConfig.getParquetBlockSize(),
+        writeConfig.getParquetPageSize(),
+        writeConfig.getParquetMaxFileSize(),
+        writeSupport.getHadoopConf(),
+        writeConfig.getParquetCompressionRatio()));
+  }
+}
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataParquetConfig.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataParquetConfig.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.io.storage.row;
+
+import org.apache.hudi.io.storage.HoodieBaseParquetConfig;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.parquet.hadoop.metadata.CompressionCodecName;
+
+/**
+ * ParquetConfig for datasource implementation with {@link org.apache.flink.table.data.RowData}.
+ */
+public class HoodieRowDataParquetConfig extends HoodieBaseParquetConfig<HoodieRowDataParquetWriteSupport> {
+
+  public HoodieRowDataParquetConfig(HoodieRowDataParquetWriteSupport writeSupport, CompressionCodecName compressionCodecName,
+                                    int blockSize, int pageSize, long maxFileSize, Configuration hadoopConf,
+                                    double compressionRatio) {
+    super(writeSupport, compressionCodecName, blockSize, pageSize, maxFileSize, hadoopConf, compressionRatio);
+  }
+}
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataParquetWriteSupport.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataParquetWriteSupport.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.io.storage.row;
+
+import org.apache.hudi.common.bloom.BloomFilter;
+import org.apache.hudi.common.bloom.HoodieDynamicBoundedBloomFilter;
+
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.types.logical.RowType;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.parquet.hadoop.api.WriteSupport;
+
+import java.util.HashMap;
+
+import static org.apache.hudi.avro.HoodieAvroWriteSupport.HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY;
+import static org.apache.hudi.avro.HoodieAvroWriteSupport.HOODIE_BLOOM_FILTER_TYPE_CODE;
+import static org.apache.hudi.avro.HoodieAvroWriteSupport.HOODIE_MAX_RECORD_KEY_FOOTER;
+import static org.apache.hudi.avro.HoodieAvroWriteSupport.HOODIE_MIN_RECORD_KEY_FOOTER;
+
+/**
+ * Hoodie Write Support for directly writing {@link RowData} to Parquet.
+ */
+public class HoodieRowDataParquetWriteSupport extends RowDataParquetWriteSupport {
+
+  private final Configuration hadoopConf;
+  private final BloomFilter bloomFilter;
+  private String minRecordKey;
+  private String maxRecordKey;
+
+  public HoodieRowDataParquetWriteSupport(Configuration conf, RowType rowType, BloomFilter bloomFilter) {
+    super(rowType);
+    this.hadoopConf = new Configuration(conf);
+    this.bloomFilter = bloomFilter;
+  }
+
+  public Configuration getHadoopConf() {
+    return hadoopConf;
+  }
+
+  @Override
+  public WriteSupport.FinalizedWriteContext finalizeWrite() {
+    HashMap<String, String> extraMetaData = new HashMap<>();
+    if (bloomFilter != null) {
+      extraMetaData.put(HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY, bloomFilter.serializeToString());
+      if (minRecordKey != null && maxRecordKey != null) {
+        extraMetaData.put(HOODIE_MIN_RECORD_KEY_FOOTER, minRecordKey);
+        extraMetaData.put(HOODIE_MAX_RECORD_KEY_FOOTER, maxRecordKey);
+      }
+      if (bloomFilter.getBloomFilterTypeCode().name().contains(HoodieDynamicBoundedBloomFilter.TYPE_CODE_PREFIX)) {
+        extraMetaData.put(HOODIE_BLOOM_FILTER_TYPE_CODE, bloomFilter.getBloomFilterTypeCode().name());
+      }
+    }
+    return new WriteSupport.FinalizedWriteContext(extraMetaData);
+  }
+
+  public void add(String recordKey) {
+    this.bloomFilter.add(recordKey);
+    if (minRecordKey != null) {
+      minRecordKey = minRecordKey.compareTo(recordKey) <= 0 ? minRecordKey : recordKey;
+    } else {
+      minRecordKey = recordKey;
+    }
+
+    if (maxRecordKey != null) {
+      maxRecordKey = maxRecordKey.compareTo(recordKey) >= 0 ? maxRecordKey : recordKey;
+    } else {
+      maxRecordKey = recordKey;
+    }
+  }
+}
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataParquetWriter.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataParquetWriter.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.io.storage.row;
+
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.fs.HoodieWrapperFileSystem;
+
+import org.apache.flink.table.data.RowData;
+import org.apache.hadoop.fs.Path;
+import org.apache.parquet.hadoop.ParquetFileWriter;
+import org.apache.parquet.hadoop.ParquetWriter;
+
+import java.io.IOException;
+
+/**
+ * Parquet's impl of {@link HoodieRowDataFileWriter} to write {@link RowData}s.
+ */
+public class HoodieRowDataParquetWriter extends ParquetWriter<RowData>
+    implements HoodieRowDataFileWriter {
+
+  private final Path file;
+  private final HoodieWrapperFileSystem fs;
+  private final long maxFileSize;
+  private final HoodieRowDataParquetWriteSupport writeSupport;
+
+  public HoodieRowDataParquetWriter(Path file, HoodieRowDataParquetConfig parquetConfig)
+      throws IOException {
+    super(HoodieWrapperFileSystem.convertToHoodiePath(file, parquetConfig.getHadoopConf()),
+        ParquetFileWriter.Mode.CREATE, parquetConfig.getWriteSupport(), parquetConfig.getCompressionCodecName(),
+        parquetConfig.getBlockSize(), parquetConfig.getPageSize(), parquetConfig.getPageSize(),
+        DEFAULT_IS_DICTIONARY_ENABLED, DEFAULT_IS_VALIDATING_ENABLED,
+        DEFAULT_WRITER_VERSION, FSUtils.registerFileSystem(file, parquetConfig.getHadoopConf()));
+    this.file = HoodieWrapperFileSystem.convertToHoodiePath(file, parquetConfig.getHadoopConf());
+    this.fs = (HoodieWrapperFileSystem) this.file.getFileSystem(FSUtils.registerFileSystem(file,
+        parquetConfig.getHadoopConf()));
+    this.maxFileSize = parquetConfig.getMaxFileSize()
+        + Math.round(parquetConfig.getMaxFileSize() * parquetConfig.getCompressionRatio());
+    this.writeSupport = parquetConfig.getWriteSupport();
+  }
+
+  @Override
+  public boolean canWrite() {
+    return fs.getBytesWritten(file) < maxFileSize;
+  }
+
+  @Override
+  public void writeRow(String key, RowData row) throws IOException {
+    super.write(row);
+    writeSupport.add(key);
+  }
+
+  @Override
+  public void writeRow(RowData row) throws IOException {
+    super.write(row);
+  }
+
+  @Override
+  public void close() throws IOException {
+    super.close();
+  }
+}
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/RowDataParquetWriteSupport.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/RowDataParquetWriteSupport.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.io.storage.row;
+
+import org.apache.flink.formats.parquet.row.ParquetRowDataWriter;
+import org.apache.flink.formats.parquet.utils.ParquetSchemaConverter;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.types.logical.RowType;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.parquet.hadoop.api.WriteSupport;
+import org.apache.parquet.io.api.RecordConsumer;
+import org.apache.parquet.schema.MessageType;
+
+import java.util.HashMap;
+
+/**
+ * Row data parquet write support.
+ */
+public class RowDataParquetWriteSupport extends WriteSupport<RowData> {
+
+  private final RowType rowType;
+  private final MessageType schema;
+  private ParquetRowDataWriter writer;
+
+  public RowDataParquetWriteSupport(RowType rowType) {
+    super();
+    this.rowType = rowType;
+    this.schema = ParquetSchemaConverter.convertToParquetMessageType("flink_schema", rowType);
+  }
+
+  @Override
+  public WriteContext init(Configuration configuration) {
+    return new WriteContext(schema, new HashMap<>());
+  }
+
+  @Override
+  public void prepareForWrite(RecordConsumer recordConsumer) {
+    // should make the utc timestamp configurable
+    this.writer = new ParquetRowDataWriter(recordConsumer, rowType, schema, true);
+  }
+
+  @Override
+  public void write(RowData record) {
+    try {
+      this.writer.write(record);
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+}
--- a/hudi-flink/pom.xml
+++ b/hudi-flink/pom.xml
@@ -339,5 +339,11 @@
      <scope>test</scope>
      <type>test-jar</type>
    </dependency>
+    <dependency>
+      <groupId>org.apache.flink</groupId>
+      <artifactId>flink-csv</artifactId>
+      <version>${flink.version}</version>
+      <scope>test</scope>
+    </dependency>
  </dependencies>
 </project>
--- a/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java
@@ -344,6 +344,22 @@ public class FlinkOptions extends HoodieConfig {
      .withDescription("Timeout limit for a writer task after it finishes a checkpoint and\n"
          + "waits for the instant commit success, only for internal use");

+  public static final ConfigOption<Boolean> SINK_SHUFFLE_BY_PARTITION = ConfigOptions
+      .key("sink.shuffle-by-partition.enable")
+      .booleanType()
+      .defaultValue(false)
+      .withDescription(
+          "The option to enable shuffle data by dynamic partition fields in sink"
+              + " phase, this can greatly reduce the number of file for filesystem sink but may"
+              + " lead data skew.");
+
+  // this is only for internal use
+  public static final ConfigOption<Boolean> WRITE_BULK_INSERT_PARTITION_SORTED = ConfigOptions
+      .key("write.bulk_insert.partition.sorted")
+      .booleanType()
+      .defaultValue(false)
+      .withDescription("Whether the bulk insert write task input records are already sorted by the partition path");
+
  // ------------------------------------------------------------------------
  //  Compaction Options
  // ------------------------------------------------------------------------
@@ -581,7 +597,9 @@ public class FlinkOptions extends HoodieConfig {
    return options.keySet().stream().anyMatch(k -> k.startsWith(PROPERTIES_PREFIX));
  }

-  /** Creates a new configuration that is initialized with the options of the given map. */
+  /**
+   * Creates a new configuration that is initialized with the options of the given map.
+   */
  public static Configuration fromMap(Map<String, String> map) {
    final Configuration configuration = new Configuration();
    map.forEach(configuration::setString);
--- a/hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteFunction.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteFunction.java
@@ -34,6 +34,7 @@ import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.index.HoodieIndex;
 import org.apache.hudi.sink.event.CommitAckEvent;
 import org.apache.hudi.sink.event.WriteMetadataEvent;
+import org.apache.hudi.sink.utils.TimeWait;
 import org.apache.hudi.table.action.commit.FlinkWriteHelper;
 import org.apache.hudi.util.StreamerUtil;

@@ -61,7 +62,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.Objects;
 import java.util.Random;
-import java.util.concurrent.TimeUnit;
 import java.util.function.BiFunction;
 import java.util.stream.Collectors;

@@ -568,24 +568,17 @@ public class StreamWriteFunction<K, I, O>
    String instant = this.writeClient.getLastPendingInstant(this.actionType);
    // if exactly-once semantics turns on,
    // waits for the checkpoint notification until the checkpoint timeout threshold hits.
-    long waitingTime = 0L;
-    long ckpTimeout = config.getLong(FlinkOptions.WRITE_COMMIT_ACK_TIMEOUT);
-    long interval = 500L;
+    TimeWait timeWait = TimeWait.builder()
+        .timeout(config.getLong(FlinkOptions.WRITE_COMMIT_ACK_TIMEOUT))
+        .action("instant initialize")
+        .build();
    while (confirming) {
      // wait condition:
      // 1. there is no inflight instant
      // 2. the inflight instant does not change and the checkpoint has buffering data
      if (instant == null || (instant.equals(this.currentInstant) && hasData)) {
        // sleep for a while
-        try {
-          if (waitingTime > ckpTimeout) {
-            throw new HoodieException("Timeout(" + waitingTime + "ms) while waiting for instant " + instant + " to commit");
-          }
-          TimeUnit.MILLISECONDS.sleep(interval);
-          waitingTime += interval;
-        } catch (InterruptedException e) {
-          throw new HoodieException("Error while waiting for instant " + instant + " to commit", e);
-        }
+        timeWait.waitFor();
        // refresh the inflight instant
        instant = this.writeClient.getLastPendingInstant(this.actionType);
      } else {
--- a/hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteOperatorFactory.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteOperatorFactory.java
@@ -27,7 +27,6 @@ import org.apache.flink.streaming.api.operators.OneInputStreamOperatorFactory;
 import org.apache.flink.streaming.api.operators.SimpleUdfStreamOperatorFactory;
 import org.apache.flink.streaming.api.operators.StreamOperator;
 import org.apache.flink.streaming.api.operators.StreamOperatorParameters;
-import org.apache.flink.streaming.runtime.tasks.ProcessingTimeService;

 /**
 * Factory class for {@link StreamWriteOperator}.
@@ -63,9 +62,4 @@ public class StreamWriteOperatorFactory<I>
  public OperatorCoordinator.Provider getCoordinatorProvider(String s, OperatorID operatorID) {
    return new StreamWriteOperatorCoordinator.Provider(operatorID, this.conf);
  }
-
-  @Override
-  public void setProcessingTimeService(ProcessingTimeService processingTimeService) {
-    super.setProcessingTimeService(processingTimeService);
-  }
 }
--- a/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/BulkInsertWriteFunction.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/BulkInsertWriteFunction.java
@@ -0,0 +1,225 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.sink.bulk;
+
+import org.apache.hudi.client.HoodieFlinkWriteClient;
+import org.apache.hudi.client.HoodieInternalWriteStatus;
+import org.apache.hudi.client.WriteStatus;
+import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.model.WriteOperationType;
+import org.apache.hudi.common.util.CommitUtils;
+import org.apache.hudi.configuration.FlinkOptions;
+import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.sink.StreamWriteOperatorCoordinator;
+import org.apache.hudi.sink.event.WriteMetadataEvent;
+import org.apache.hudi.sink.utils.TimeWait;
+import org.apache.hudi.util.StreamerUtil;
+
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.runtime.operators.coordination.OperatorEventGateway;
+import org.apache.flink.streaming.api.functions.ProcessFunction;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.types.logical.RowType;
+import org.apache.flink.util.Collector;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * Sink function to write the data to the underneath filesystem.
+ *
+ * <p>The function should only be used in operation type {@link WriteOperationType#BULK_INSERT}.
+ *
+ * <p>Note: The function task requires the input stream be shuffled by partition path.
+ *
+ * @param <I> Type of the input record
+ * @see StreamWriteOperatorCoordinator
+ */
+public class BulkInsertWriteFunction<I, O>
+    extends ProcessFunction<I, O> {
+
+  private static final long serialVersionUID = 1L;
+
+  private static final Logger LOG = LoggerFactory.getLogger(BulkInsertWriteFunction.class);
+
+  /**
+   * Helper class for bulk insert mode.
+   */
+  private transient BulkInsertWriterHelper writerHelper;
+
+  /**
+   * Config options.
+   */
+  private final Configuration config;
+
+  /**
+   * Table row type.
+   */
+  private final RowType rowType;
+
+  /**
+   * Id of current subtask.
+   */
+  private int taskID;
+
+  /**
+   * Write Client.
+   */
+  private transient HoodieFlinkWriteClient writeClient;
+
+  /**
+   * The initial inflight instant when start up.
+   */
+  private volatile String initInstant;
+
+  /**
+   * Gateway to send operator events to the operator coordinator.
+   */
+  private transient OperatorEventGateway eventGateway;
+
+  /**
+   * Commit action type.
+   */
+  private transient String actionType;
+
+  /**
+   * Constructs a StreamingSinkFunction.
+   *
+   * @param config The config options
+   */
+  public BulkInsertWriteFunction(Configuration config, RowType rowType) {
+    this.config = config;
+    this.rowType = rowType;
+  }
+
+  @Override
+  public void open(Configuration parameters) throws IOException {
+    this.taskID = getRuntimeContext().getIndexOfThisSubtask();
+    this.writeClient = StreamerUtil.createWriteClient(this.config, getRuntimeContext());
+    this.actionType = CommitUtils.getCommitActionType(
+        WriteOperationType.fromValue(config.getString(FlinkOptions.OPERATION)),
+        HoodieTableType.valueOf(config.getString(FlinkOptions.TABLE_TYPE)));
+
+    this.initInstant = this.writeClient.getLastPendingInstant(this.actionType);
+    sendBootstrapEvent();
+    initWriterHelper();
+  }
+
+  @Override
+  public void processElement(I value, Context ctx, Collector<O> out) throws IOException {
+    this.writerHelper.write((RowData) value);
+  }
+
+  @Override
+  public void close() {
+    if (this.writeClient != null) {
+      this.writeClient.cleanHandlesGracefully();
+      this.writeClient.close();
+    }
+  }
+
+  /**
+   * End input action for batch source.
+   */
+  public void endInput() {
+    final List<WriteStatus> writeStatus;
+    try {
+      this.writerHelper.close();
+      writeStatus = this.writerHelper.getWriteStatuses().stream()
+          .map(BulkInsertWriteFunction::toWriteStatus).collect(Collectors.toList());
+    } catch (IOException e) {
+      throw new HoodieException("Error collect the write status for task [" + this.taskID + "]");
+    }
+    final WriteMetadataEvent event = WriteMetadataEvent.builder()
+        .taskID(taskID)
+        .instantTime(this.writerHelper.getInstantTime())
+        .writeStatus(writeStatus)
+        .lastBatch(true)
+        .endInput(true)
+        .build();
+    this.eventGateway.sendEventToCoordinator(event);
+  }
+
+  /**
+   * Tool to convert {@link HoodieInternalWriteStatus} into {@link WriteStatus}.
+   */
+  private static WriteStatus toWriteStatus(HoodieInternalWriteStatus internalWriteStatus) {
+    WriteStatus writeStatus = new WriteStatus(false, 0.1);
+    writeStatus.setStat(internalWriteStatus.getStat());
+    writeStatus.setFileId(internalWriteStatus.getFileId());
+    writeStatus.setGlobalError(internalWriteStatus.getGlobalError());
+    writeStatus.setTotalRecords(internalWriteStatus.getTotalRecords());
+    writeStatus.setTotalErrorRecords(internalWriteStatus.getTotalErrorRecords());
+    return writeStatus;
+  }
+
+  // -------------------------------------------------------------------------
+  //  Getter/Setter
+  // -------------------------------------------------------------------------
+
+  public void setOperatorEventGateway(OperatorEventGateway operatorEventGateway) {
+    this.eventGateway = operatorEventGateway;
+  }
+
+  // -------------------------------------------------------------------------
+  //  Utilities
+  // -------------------------------------------------------------------------
+
+  private void initWriterHelper() {
+    String instant = instantToWrite();
+    this.writerHelper = new BulkInsertWriterHelper(this.config, this.writeClient.getHoodieTable(), this.writeClient.getConfig(),
+        instant, this.taskID, getRuntimeContext().getNumberOfParallelSubtasks(), getRuntimeContext().getAttemptNumber(),
+        this.rowType);
+  }
+
+  private void sendBootstrapEvent() {
+    WriteMetadataEvent event = WriteMetadataEvent.builder()
+        .taskID(taskID)
+        .writeStatus(Collections.emptyList())
+        .instantTime("")
+        .bootstrap(true)
+        .build();
+    this.eventGateway.sendEventToCoordinator(event);
+    LOG.info("Send bootstrap write metadata event to coordinator, task[{}].", taskID);
+  }
+
+  private String instantToWrite() {
+    String instant = this.writeClient.getLastPendingInstant(this.actionType);
+    // if exactly-once semantics turns on,
+    // waits for the checkpoint notification until the checkpoint timeout threshold hits.
+    TimeWait timeWait = TimeWait.builder()
+        .timeout(config.getLong(FlinkOptions.WRITE_COMMIT_ACK_TIMEOUT))
+        .action("instant initialize")
+        .build();
+    while (instant == null || instant.equals(this.initInstant)) {
+      // wait condition:
+      // 1. there is no inflight instant
+      // 2. the inflight instant does not change
+      // sleep for a while
+      timeWait.waitFor();
+      // refresh the inflight instant
+      instant = this.writeClient.getLastPendingInstant(this.actionType);
+    }
+    return instant;
+  }
+}
--- a/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/BulkInsertWriteOperator.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/BulkInsertWriteOperator.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.sink.bulk;
+
+import org.apache.hudi.sink.StreamWriteOperatorCoordinator;
+
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.runtime.jobgraph.OperatorID;
+import org.apache.flink.runtime.operators.coordination.OperatorCoordinator;
+import org.apache.flink.runtime.operators.coordination.OperatorEvent;
+import org.apache.flink.runtime.operators.coordination.OperatorEventDispatcher;
+import org.apache.flink.runtime.operators.coordination.OperatorEventGateway;
+import org.apache.flink.runtime.operators.coordination.OperatorEventHandler;
+import org.apache.flink.streaming.api.operators.BoundedOneInput;
+import org.apache.flink.streaming.api.operators.CoordinatedOperatorFactory;
+import org.apache.flink.streaming.api.operators.OneInputStreamOperatorFactory;
+import org.apache.flink.streaming.api.operators.ProcessOperator;
+import org.apache.flink.streaming.api.operators.SimpleUdfStreamOperatorFactory;
+import org.apache.flink.streaming.api.operators.StreamOperator;
+import org.apache.flink.streaming.api.operators.StreamOperatorParameters;
+import org.apache.flink.streaming.runtime.tasks.ProcessingTimeService;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.types.logical.RowType;
+
+/**
+ * Operator for bulk insert mode sink.
+ *
+ * @param <I> The input type
+ */
+public class BulkInsertWriteOperator<I>
+    extends ProcessOperator<I, Object>
+    implements OperatorEventHandler, BoundedOneInput {
+  private final BulkInsertWriteFunction<I, Object> sinkFunction;
+
+  public BulkInsertWriteOperator(Configuration conf, RowType rowType) {
+    super(new BulkInsertWriteFunction<>(conf, rowType));
+    this.sinkFunction = (BulkInsertWriteFunction<I, Object>) getUserFunction();
+  }
+
+  @Override
+  public void handleOperatorEvent(OperatorEvent event) {
+    // no operation
+  }
+
+  void setOperatorEventGateway(OperatorEventGateway operatorEventGateway) {
+    sinkFunction.setOperatorEventGateway(operatorEventGateway);
+  }
+
+  @Override
+  public void endInput() {
+    sinkFunction.endInput();
+  }
+
+  public static OperatorFactory<RowData> getFactory(Configuration conf, RowType rowType) {
+    return new OperatorFactory<>(conf, rowType);
+  }
+
+  // -------------------------------------------------------------------------
+  //  Inner Class
+  // -------------------------------------------------------------------------
+
+  public static class OperatorFactory<I>
+      extends SimpleUdfStreamOperatorFactory<Object>
+      implements CoordinatedOperatorFactory<Object>, OneInputStreamOperatorFactory<I, Object> {
+    private static final long serialVersionUID = 1L;
+
+    private final BulkInsertWriteOperator<I> operator;
+    private final Configuration conf;
+
+    public OperatorFactory(Configuration conf, RowType rowType) {
+      super(new BulkInsertWriteOperator<>(conf, rowType));
+      this.operator = (BulkInsertWriteOperator<I>) getOperator();
+      this.conf = conf;
+    }
+
+    @Override
+    @SuppressWarnings("unchecked")
+    public <T extends StreamOperator<Object>> T createStreamOperator(StreamOperatorParameters<Object> parameters) {
+      final OperatorID operatorID = parameters.getStreamConfig().getOperatorID();
+      final OperatorEventDispatcher eventDispatcher = parameters.getOperatorEventDispatcher();
+
+      this.operator.setOperatorEventGateway(eventDispatcher.getOperatorEventGateway(operatorID));
+      this.operator.setup(parameters.getContainingTask(), parameters.getStreamConfig(), parameters.getOutput());
+      this.operator.setProcessingTimeService(this.processingTimeService);
+      eventDispatcher.registerEventHandler(operatorID, operator);
+      return (T) operator;
+    }
+
+    @Override
+    public OperatorCoordinator.Provider getCoordinatorProvider(String s, OperatorID operatorID) {
+      return new StreamWriteOperatorCoordinator.Provider(operatorID, this.conf);
+    }
+
+    @Override
+    public void setProcessingTimeService(ProcessingTimeService processingTimeService) {
+      super.setProcessingTimeService(processingTimeService);
+    }
+  }
+}
--- a/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/BulkInsertWriterHelper.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/BulkInsertWriterHelper.java
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.sink.bulk;
+
+import org.apache.hudi.client.HoodieInternalWriteStatus;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.configuration.FlinkOptions;
+import org.apache.hudi.io.storage.row.HoodieRowDataCreateHandle;
+import org.apache.hudi.table.HoodieTable;
+
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.table.api.DataTypes;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.types.logical.LogicalType;
+import org.apache.flink.table.types.logical.RowType;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+
+/**
+ * Helper class for bulk insert used by Flink.
+ */
+public class BulkInsertWriterHelper {
+
+  private static final Logger LOG = LogManager.getLogger(BulkInsertWriterHelper.class);
+
+  private final String instantTime;
+  private final int taskPartitionId;
+  private final long taskId;
+  private final long taskEpochId;
+  private final HoodieTable hoodieTable;
+  private final HoodieWriteConfig writeConfig;
+  private final RowType rowType;
+  private final Boolean arePartitionRecordsSorted;
+  private final List<HoodieInternalWriteStatus> writeStatusList = new ArrayList<>();
+  private HoodieRowDataCreateHandle handle;
+  private String lastKnownPartitionPath = null;
+  private final String fileIdPrefix;
+  private int numFilesWritten = 0;
+  private final Map<String, HoodieRowDataCreateHandle> handles = new HashMap<>();
+  private final RowDataKeyGen keyGen;
+
+  public BulkInsertWriterHelper(Configuration conf, HoodieTable hoodieTable, HoodieWriteConfig writeConfig,
+                                String instantTime, int taskPartitionId, long taskId, long taskEpochId, RowType rowType) {
+    this.hoodieTable = hoodieTable;
+    this.writeConfig = writeConfig;
+    this.instantTime = instantTime;
+    this.taskPartitionId = taskPartitionId;
+    this.taskId = taskId;
+    this.taskEpochId = taskEpochId;
+    this.rowType = addMetadataFields(rowType); // patch up with metadata fields
+    this.arePartitionRecordsSorted = conf.getBoolean(FlinkOptions.WRITE_BULK_INSERT_PARTITION_SORTED);
+    this.fileIdPrefix = UUID.randomUUID().toString();
+    this.keyGen = RowDataKeyGen.instance(conf, rowType);
+  }
+
+  /**
+   * Returns the write instant time.
+   */
+  public String getInstantTime() {
+    return this.instantTime;
+  }
+
+  public void write(RowData record) throws IOException {
+    try {
+      String recordKey = keyGen.getRecordKey(record);
+      String partitionPath = keyGen.getPartitionPath(record);
+
+      if ((lastKnownPartitionPath == null) || !lastKnownPartitionPath.equals(partitionPath) || !handle.canWrite()) {
+        LOG.info("Creating new file for partition path " + partitionPath);
+        handle = getRowCreateHandle(partitionPath);
+        lastKnownPartitionPath = partitionPath;
+      }
+      handle.write(recordKey, partitionPath, record);
+    } catch (Throwable t) {
+      LOG.error("Global error thrown while trying to write records in HoodieRowCreateHandle ", t);
+      throw t;
+    }
+  }
+
+  public List<HoodieInternalWriteStatus> getWriteStatuses() throws IOException {
+    close();
+    return writeStatusList;
+  }
+
+  private HoodieRowDataCreateHandle getRowCreateHandle(String partitionPath) throws IOException {
+    if (!handles.containsKey(partitionPath)) { // if there is no handle corresponding to the partition path
+      // if records are sorted, we can close all existing handles
+      if (arePartitionRecordsSorted) {
+        close();
+      }
+      HoodieRowDataCreateHandle rowCreateHandle = new HoodieRowDataCreateHandle(hoodieTable, writeConfig, partitionPath, getNextFileId(),
+          instantTime, taskPartitionId, taskId, taskEpochId, rowType);
+      handles.put(partitionPath, rowCreateHandle);
+    } else if (!handles.get(partitionPath).canWrite()) {
+      // even if there is a handle to the partition path, it could have reached its max size threshold. So, we close the handle here and
+      // create a new one.
+      writeStatusList.add(handles.remove(partitionPath).close());
+      HoodieRowDataCreateHandle rowCreateHandle = new HoodieRowDataCreateHandle(hoodieTable, writeConfig, partitionPath, getNextFileId(),
+          instantTime, taskPartitionId, taskId, taskEpochId, rowType);
+      handles.put(partitionPath, rowCreateHandle);
+    }
+    return handles.get(partitionPath);
+  }
+
+  public void close() throws IOException {
+    for (HoodieRowDataCreateHandle rowCreateHandle : handles.values()) {
+      writeStatusList.add(rowCreateHandle.close());
+    }
+    handles.clear();
+    handle = null;
+  }
+
+  private String getNextFileId() {
+    return String.format("%s-%d", fileIdPrefix, numFilesWritten++);
+  }
+
+  /**
+   * Adds the Hoodie metadata fields to the given row type.
+   */
+  private static RowType addMetadataFields(RowType rowType) {
+    List<RowType.RowField> mergedFields = new ArrayList<>();
+
+    LogicalType metadataFieldType = DataTypes.STRING().getLogicalType();
+    RowType.RowField commitTimeField =
+        new RowType.RowField(HoodieRecord.COMMIT_TIME_METADATA_FIELD, metadataFieldType, "commit time");
+    RowType.RowField commitSeqnoField =
+        new RowType.RowField(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, metadataFieldType, "commit seqno");
+    RowType.RowField recordKeyField =
+        new RowType.RowField(HoodieRecord.RECORD_KEY_METADATA_FIELD, metadataFieldType, "record key");
+    RowType.RowField partitionPathField =
+        new RowType.RowField(HoodieRecord.PARTITION_PATH_METADATA_FIELD, metadataFieldType, "partition path");
+    RowType.RowField fileNameField =
+        new RowType.RowField(HoodieRecord.FILENAME_METADATA_FIELD, metadataFieldType, "field name");
+
+    mergedFields.add(commitTimeField);
+    mergedFields.add(commitSeqnoField);
+    mergedFields.add(recordKeyField);
+    mergedFields.add(partitionPathField);
+    mergedFields.add(fileNameField);
+    mergedFields.addAll(rowType.getFields());
+
+    return new RowType(false, mergedFields);
+  }
+}
+
--- a/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/RowDataKeyGen.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/RowDataKeyGen.java
@@ -0,0 +1,228 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.sink.bulk;
+
+import org.apache.hudi.common.util.PartitionPathEncodeUtils;
+import org.apache.hudi.common.util.StringUtils;
+import org.apache.hudi.configuration.FlinkOptions;
+import org.apache.hudi.exception.HoodieKeyException;
+import org.apache.hudi.util.RowDataProjection;
+
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.types.logical.LogicalType;
+import org.apache.flink.table.types.logical.RowType;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Key generator for {@link RowData}.
+ */
+public class RowDataKeyGen {
+
+  // reference: NonpartitionedAvroKeyGenerator
+  private static final String EMPTY_PARTITION = "";
+
+  // reference: org.apache.hudi.keygen.KeyGenUtils
+  private static final String NULL_RECORDKEY_PLACEHOLDER = "__null__";
+  private static final String EMPTY_RECORDKEY_PLACEHOLDER = "__empty__";
+
+  private static final String DEFAULT_PARTITION_PATH = "default";
+  private static final String DEFAULT_PARTITION_PATH_SEPARATOR = "/";
+
+  private final String[] recordKeyFields;
+  private final String[] partitionPathFields;
+
+  private final RowDataProjection recordKeyProjection;
+  private final RowDataProjection partitionPathProjection;
+
+  private final boolean hiveStylePartitioning;
+  private final boolean encodePartitionPath;
+
+  // efficient code path
+  private boolean simpleRecordKey = false;
+  private RowData.FieldGetter recordKeyFieldGetter;
+
+  private boolean simplePartitionPath = false;
+  private RowData.FieldGetter partitionPathFieldGetter;
+
+  private boolean nonPartitioned;
+
+  private RowDataKeyGen(
+      String recordKeys,
+      String partitionFields,
+      RowType rowType,
+      boolean hiveStylePartitioning,
+      boolean encodePartitionPath) {
+    this.recordKeyFields = recordKeys.split(",");
+    this.partitionPathFields = partitionFields.split(",");
+    List<String> fieldNames = rowType.getFieldNames();
+    List<LogicalType> fieldTypes = rowType.getChildren();
+
+    this.hiveStylePartitioning = hiveStylePartitioning;
+    this.encodePartitionPath = encodePartitionPath;
+    if (this.recordKeyFields.length == 1) {
+      // efficient code path
+      this.simpleRecordKey = true;
+      int recordKeyIdx = fieldNames.indexOf(this.recordKeyFields[0]);
+      this.recordKeyFieldGetter = RowData.createFieldGetter(fieldTypes.get(recordKeyIdx), recordKeyIdx);
+      this.recordKeyProjection = null;
+    } else {
+      this.recordKeyProjection = getProjection(this.recordKeyFields, fieldNames, fieldTypes);
+    }
+    if (this.partitionPathFields.length == 1) {
+      // efficient code path
+      if (this.partitionPathFields[0].equals("")) {
+        this.nonPartitioned = true;
+      } else {
+        this.simplePartitionPath = true;
+        int partitionPathIdx = fieldNames.indexOf(this.partitionPathFields[0]);
+        this.partitionPathFieldGetter = RowData.createFieldGetter(fieldTypes.get(partitionPathIdx), partitionPathIdx);
+      }
+      this.partitionPathProjection = null;
+    } else {
+      this.partitionPathProjection = getProjection(this.partitionPathFields, fieldNames, fieldTypes);
+    }
+  }
+
+  public static RowDataKeyGen instance(Configuration conf, RowType rowType) {
+    return new RowDataKeyGen(conf.getString(FlinkOptions.RECORD_KEY_FIELD), conf.getString(FlinkOptions.PARTITION_PATH_FIELD),
+        rowType, conf.getBoolean(FlinkOptions.HIVE_STYLE_PARTITIONING), conf.getBoolean(FlinkOptions.URL_ENCODE_PARTITIONING));
+  }
+
+  public String getRecordKey(RowData rowData) {
+    if (this.simpleRecordKey) {
+      return getRecordKey(recordKeyFieldGetter.getFieldOrNull(rowData), this.recordKeyFields[0]);
+    } else {
+      Object[] keyValues = this.recordKeyProjection.projectAsValues(rowData);
+      return getRecordKey(keyValues, this.recordKeyFields);
+    }
+  }
+
+  public String getPartitionPath(RowData rowData) {
+    if (this.simplePartitionPath) {
+      return getPartitionPath(partitionPathFieldGetter.getFieldOrNull(rowData),
+          this.partitionPathFields[0], this.hiveStylePartitioning, this.encodePartitionPath);
+    } else if (this.nonPartitioned) {
+      return EMPTY_PARTITION;
+    } else {
+      Object[] partValues = this.partitionPathProjection.projectAsValues(rowData);
+      return getRecordPartitionPath(partValues, this.partitionPathFields, this.hiveStylePartitioning, this.encodePartitionPath);
+    }
+  }
+
+  // reference: org.apache.hudi.keygen.KeyGenUtils.getRecordPartitionPath
+  private static String getRecordKey(Object[] keyValues, String[] keyFields) {
+    boolean keyIsNullEmpty = true;
+    StringBuilder recordKey = new StringBuilder();
+    for (int i = 0; i < keyValues.length; i++) {
+      String recordKeyField = keyFields[i];
+      String recordKeyValue = StringUtils.objToString(keyValues[i]);
+      if (recordKeyValue == null) {
+        recordKey.append(recordKeyField).append(":").append(NULL_RECORDKEY_PLACEHOLDER).append(",");
+      } else if (recordKeyValue.isEmpty()) {
+        recordKey.append(recordKeyField).append(":").append(EMPTY_RECORDKEY_PLACEHOLDER).append(",");
+      } else {
+        recordKey.append(recordKeyField).append(":").append(recordKeyValue).append(",");
+        keyIsNullEmpty = false;
+      }
+    }
+    recordKey.deleteCharAt(recordKey.length() - 1);
+    if (keyIsNullEmpty) {
+      throw new HoodieKeyException("recordKey values: \"" + recordKey + "\" for fields: "
+          + Arrays.toString(keyFields) + " cannot be entirely null or empty.");
+    }
+    return recordKey.toString();
+  }
+
+  // reference: org.apache.hudi.keygen.KeyGenUtils.getRecordPartitionPath
+  private static String getRecordPartitionPath(
+      Object[] partValues,
+      String[] partFields,
+      boolean hiveStylePartitioning,
+      boolean encodePartitionPath) {
+    StringBuilder partitionPath = new StringBuilder();
+    for (int i = 0; i < partFields.length; i++) {
+      String partField = partFields[i];
+      String partValue = StringUtils.objToString(partValues[i]);
+      if (partValue == null || partValue.isEmpty()) {
+        partitionPath.append(hiveStylePartitioning ? partField + "=" + DEFAULT_PARTITION_PATH
+            : DEFAULT_PARTITION_PATH);
+      } else {
+        if (encodePartitionPath) {
+          partValue = PartitionPathEncodeUtils.escapePathName(partValue);
+        }
+        partitionPath.append(hiveStylePartitioning ? partField + "=" + partValue : partValue);
+      }
+      partitionPath.append(DEFAULT_PARTITION_PATH_SEPARATOR);
+    }
+    partitionPath.deleteCharAt(partitionPath.length() - 1);
+    return partitionPath.toString();
+  }
+
+  // reference: org.apache.hudi.keygen.KeyGenUtils.getRecordKey
+  public static String getRecordKey(Object recordKeyValue, String recordKeyField) {
+    String recordKey = StringUtils.objToString(recordKeyValue);
+    if (recordKey == null || recordKey.isEmpty()) {
+      throw new HoodieKeyException("recordKey value: \"" + recordKey + "\" for field: \"" + recordKeyField + "\" cannot be null or empty.");
+    }
+    return recordKey;
+  }
+
+  // reference: org.apache.hudi.keygen.KeyGenUtils.getPartitionPath
+  public static String getPartitionPath(
+      Object partValue,
+      String partField,
+      boolean hiveStylePartitioning,
+      boolean encodePartitionPath) {
+    String partitionPath = StringUtils.objToString(partValue);
+    if (partitionPath == null || partitionPath.isEmpty()) {
+      partitionPath = DEFAULT_PARTITION_PATH;
+    }
+    if (encodePartitionPath) {
+      partitionPath = PartitionPathEncodeUtils.escapePathName(partitionPath);
+    }
+    if (hiveStylePartitioning) {
+      partitionPath = partField + "=" + partitionPath;
+    }
+    return partitionPath;
+  }
+
+  /**
+   * Returns the row data projection for the given field names and table schema.
+   *
+   * @param fields       The projected field names
+   * @param schemaFields The table schema names
+   * @param schemaTypes  The table schema types
+   * @return the row data projection for the fields
+   */
+  private static RowDataProjection getProjection(String[] fields, List<String> schemaFields, List<LogicalType> schemaTypes) {
+    int[] positions = getFieldPositions(fields, schemaFields);
+    LogicalType[] types = Arrays.stream(positions).mapToObj(schemaTypes::get).toArray(LogicalType[]::new);
+    return RowDataProjection.instance(types, positions);
+  }
+
+  /**
+   * Returns the field positions of the given fields {@code fields} among all the fields {@code allFields}.
+   */
+  private static int[] getFieldPositions(String[] fields, List<String> allFields) {
+    return Arrays.stream(fields).mapToInt(allFields::indexOf).toArray();
+  }
+}
--- a/hudi-flink/src/main/java/org/apache/hudi/sink/utils/TimeWait.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/sink/utils/TimeWait.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.sink.utils;
+
+import org.apache.hudi.exception.HoodieException;
+
+import java.util.Objects;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Tool used for time waiting.
+ */
+public class TimeWait {
+  private final long timeout;  // timeout in SECONDS
+  private final long interval; // interval in MILLISECONDS
+  private final String action; // action to report error message
+  private long waitingTime = 0L;
+
+  private TimeWait(long timeout, long interval, String action) {
+    this.timeout = timeout;
+    this.interval = interval;
+    this.action = action;
+  }
+
+  public static Builder builder() {
+    return new Builder();
+  }
+
+  /**
+   * Wait for an interval time.
+   */
+  public void waitFor() {
+    try {
+      if (waitingTime > timeout) {
+        throw new HoodieException("Timeout(" + waitingTime + "ms) while waiting for " + action);
+      }
+      TimeUnit.MILLISECONDS.sleep(interval);
+      waitingTime += interval;
+    } catch (InterruptedException e) {
+      throw new HoodieException("Error while waiting for " + action, e);
+    }
+  }
+
+  /**
+   * Builder.
+   */
+  public static class Builder {
+    private long timeout;
+    private long interval;
+    private String action;
+
+    public Builder() {
+      this.timeout = 3600;
+      this.interval = 500;
+    }
+
+    public Builder timeout(long timeout) {
+      this.timeout = timeout;
+      return this;
+    }
+
+    public Builder interval(long interval) {
+      this.interval = interval;
+      return this;
+    }
+
+    public Builder action(String action) {
+      this.action = action;
+      return this;
+    }
+
+    public TimeWait build() {
+      Objects.requireNonNull(this.action);
+      return new TimeWait(this.timeout, this.interval, this.action);
+    }
+  }
+}
--- a/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSink.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSink.java
@@ -24,6 +24,7 @@ import org.apache.hudi.configuration.FlinkOptions;
 import org.apache.hudi.sink.CleanFunction;
 import org.apache.hudi.sink.StreamWriteOperatorFactory;
 import org.apache.hudi.sink.bootstrap.BootstrapFunction;
+import org.apache.hudi.sink.bulk.BulkInsertWriteOperator;
 import org.apache.hudi.sink.compact.CompactFunction;
 import org.apache.hudi.sink.compact.CompactionCommitEvent;
 import org.apache.hudi.sink.compact.CompactionCommitSink;
@@ -45,6 +46,7 @@ import org.apache.flink.table.connector.sink.DataStreamSinkProvider;
 import org.apache.flink.table.connector.sink.DynamicTableSink;
 import org.apache.flink.table.connector.sink.abilities.SupportsOverwrite;
 import org.apache.flink.table.connector.sink.abilities.SupportsPartitioning;
+import org.apache.flink.table.data.RowData;
 import org.apache.flink.table.types.logical.RowType;
 import org.apache.flink.types.RowKind;

@@ -58,29 +60,57 @@ public class HoodieTableSink implements DynamicTableSink, SupportsPartitioning,
  private final Configuration conf;
  private final TableSchema schema;
  private boolean overwrite = false;
+  private boolean supportsGrouping = false;

  public HoodieTableSink(Configuration conf, TableSchema schema) {
    this.conf = conf;
    this.schema = schema;
  }

+  public HoodieTableSink(Configuration conf, TableSchema schema, boolean overwrite, boolean supportsGrouping) {
+    this.conf = conf;
+    this.schema = schema;
+    this.overwrite = overwrite;
+    this.supportsGrouping = supportsGrouping;
+  }
+
  @Override
  public SinkRuntimeProvider getSinkRuntimeProvider(Context context) {
    return (DataStreamSinkProvider) dataStream -> {
-      // Read from kafka source
-      RowType rowType = (RowType) schema.toRowDataType().notNull().getLogicalType();
+
+      // setup configuration
      long ckpTimeout = dataStream.getExecutionEnvironment()
          .getCheckpointConfig().getCheckpointTimeout();
-      int parallelism = dataStream.getExecutionConfig().getParallelism();
      conf.setLong(FlinkOptions.WRITE_COMMIT_ACK_TIMEOUT, ckpTimeout);
+
+      RowType rowType = (RowType) schema.toRowDataType().notNull().getLogicalType();
+
+      // bulk_insert mode
+      final String writeOperation = this.conf.get(FlinkOptions.OPERATION);
+      if (WriteOperationType.fromValue(writeOperation) == WriteOperationType.BULK_INSERT) {
+        this.conf.set(FlinkOptions.WRITE_BULK_INSERT_PARTITION_SORTED, this.supportsGrouping);
+        BulkInsertWriteOperator.OperatorFactory<RowData> operatorFactory = BulkInsertWriteOperator.getFactory(this.conf, rowType);
+        return dataStream.transform("hoodie_bulk_insert_write",
+            TypeInformation.of(Object.class),
+            operatorFactory)
+            // follow the parallelism of upstream operators to avoid shuffle
+            .setParallelism(dataStream.getParallelism())
+            .addSink(new CleanFunction<>(conf))
+            .setParallelism(1)
+            .name("clean_commits");
+      }
+
+      // stream write
+      int parallelism = dataStream.getExecutionConfig().getParallelism();
      StreamWriteOperatorFactory<HoodieRecord> operatorFactory = new StreamWriteOperatorFactory<>(conf);

-      DataStream<HoodieRecord> hoodieDataStream = dataStream
+      DataStream<HoodieRecord> dataStream1 = dataStream
          .map(new RowDataToHoodieFunction<>(rowType, conf), TypeInformation.of(HoodieRecord.class));

+      // bootstrap index
      // TODO: This is a very time-consuming operation, will optimization
      if (conf.getBoolean(FlinkOptions.INDEX_BOOTSTRAP_ENABLED)) {
-        hoodieDataStream = hoodieDataStream.rebalance()
+        dataStream1 = dataStream1.rebalance()
            .transform(
                "index_bootstrap",
                TypeInformation.of(HoodieRecord.class),
@@ -89,7 +119,7 @@ public class HoodieTableSink implements DynamicTableSink, SupportsPartitioning,
            .uid("uid_index_bootstrap_" + conf.getString(FlinkOptions.TABLE_NAME));
      }

-      DataStream<Object> pipeline = hoodieDataStream
+      DataStream<Object> pipeline = dataStream1
          // Key-by record key, to avoid multiple subtasks write to a bucket at the same time
          .keyBy(HoodieRecord::getRecordKey)
          .transform(
@@ -103,6 +133,7 @@ public class HoodieTableSink implements DynamicTableSink, SupportsPartitioning,
          .transform("hoodie_stream_write", TypeInformation.of(Object.class), operatorFactory)
          .uid("uid_hoodie_stream_write" + conf.getString(FlinkOptions.TABLE_NAME))
          .setParallelism(conf.getInteger(FlinkOptions.WRITE_TASKS));
+      // compaction
      if (StreamerUtil.needsAsyncCompaction(conf)) {
        return pipeline.transform("compact_plan_generate",
            TypeInformation.of(CompactionPlanEvent.class),
@@ -141,7 +172,7 @@ public class HoodieTableSink implements DynamicTableSink, SupportsPartitioning,

  @Override
  public DynamicTableSink copy() {
-    return new HoodieTableSink(this.conf, this.schema);
+    return new HoodieTableSink(this.conf, this.schema, this.overwrite, this.supportsGrouping);
  }

  @Override
@@ -167,4 +198,10 @@ public class HoodieTableSink implements DynamicTableSink, SupportsPartitioning,
  public void applyOverwrite(boolean b) {
    this.overwrite = b;
  }
+
+  @Override
+  public boolean requiresPartitionGrouping(boolean supportsGrouping) {
+    this.supportsGrouping = supportsGrouping;
+    return supportsGrouping;
+  }
 }
--- a/hudi-flink/src/main/java/org/apache/hudi/util/RowDataProjection.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/util/RowDataProjection.java
@@ -47,6 +47,10 @@ public class RowDataProjection {
    return new RowDataProjection(types, positions);
  }

+  public static RowDataProjection instance(LogicalType[] types, int[] positions) {
+    return new RowDataProjection(types, positions);
+  }
+
  /**
   * Returns the projected row data.
   */
@@ -58,4 +62,16 @@ public class RowDataProjection {
    }
    return genericRowData;
  }
+
+  /**
+   * Returns the projected values array.
+   */
+  public Object[] projectAsValues(RowData rowData) {
+    Object[] values = new Object[this.fieldGetters.length];
+    for (int i = 0; i < this.fieldGetters.length; i++) {
+      final Object val = this.fieldGetters[i].getFieldOrNull(rowData);
+      values[i] = val;
+    }
+    return values;
+  }
 }
--- a/hudi-flink/src/test/java/org/apache/hudi/sink/bulk/TestRowDataKeyGen.java
+++ b/hudi-flink/src/test/java/org/apache/hudi/sink/bulk/TestRowDataKeyGen.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.sink.bulk;
+
+import org.apache.hudi.configuration.FlinkOptions;
+import org.apache.hudi.exception.HoodieKeyException;
+import org.apache.hudi.utils.TestConfigurations;
+
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.data.StringData;
+import org.apache.flink.table.data.TimestampData;
+import org.junit.jupiter.api.Test;
+
+import static org.apache.hudi.utils.TestData.insertRow;
+import static org.hamcrest.CoreMatchers.is;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+/**
+ * Test cases for {@link RowDataKeyGen}.
+ */
+public class TestRowDataKeyGen {
+  @Test
+  void testSimpleKeyAndPartition() {
+    Configuration conf = TestConfigurations.getDefaultConf("path1");
+    final RowData rowData1 = insertRow(StringData.fromString("id1"), StringData.fromString("Danny"), 23,
+        TimestampData.fromEpochMillis(1), StringData.fromString("par1"));
+    final RowDataKeyGen keyGen1 = RowDataKeyGen.instance(conf, TestConfigurations.ROW_TYPE);
+    assertThat(keyGen1.getRecordKey(rowData1), is("id1"));
+    assertThat(keyGen1.getPartitionPath(rowData1), is("par1"));
+
+    // null record key and partition path
+    final RowData rowData2 = insertRow(null, StringData.fromString("Danny"), 23,
+        TimestampData.fromEpochMillis(1), null);
+    assertThrows(HoodieKeyException.class, () -> keyGen1.getRecordKey(rowData2));
+    assertThat(keyGen1.getPartitionPath(rowData2), is("default"));
+    // empty record key and partition path
+    final RowData rowData3 = insertRow(StringData.fromString(""), StringData.fromString("Danny"), 23,
+        TimestampData.fromEpochMillis(1), StringData.fromString(""));
+    assertThrows(HoodieKeyException.class, () -> keyGen1.getRecordKey(rowData3));
+    assertThat(keyGen1.getPartitionPath(rowData3), is("default"));
+
+    // hive style partitioning
+    conf.set(FlinkOptions.HIVE_STYLE_PARTITIONING, true);
+    final RowDataKeyGen keyGen2 = RowDataKeyGen.instance(conf, TestConfigurations.ROW_TYPE);
+    assertThat(keyGen2.getPartitionPath(rowData1), is("partition=par1"));
+    assertThat(keyGen2.getPartitionPath(rowData2), is("partition=default"));
+    assertThat(keyGen2.getPartitionPath(rowData3), is("partition=default"));
+  }
+
+  @Test
+  void testComplexKeyAndPartition() {
+    Configuration conf = TestConfigurations.getDefaultConf("path1");
+    conf.set(FlinkOptions.RECORD_KEY_FIELD, "uuid,name");
+    conf.set(FlinkOptions.PARTITION_PATH_FIELD, "partition,ts");
+    RowData rowData1 = insertRow(StringData.fromString("id1"), StringData.fromString("Danny"), 23,
+        TimestampData.fromEpochMillis(1), StringData.fromString("par1"));
+    RowDataKeyGen keyGen1 = RowDataKeyGen.instance(conf, TestConfigurations.ROW_TYPE);
+    assertThat(keyGen1.getRecordKey(rowData1), is("uuid:id1,name:Danny"));
+    assertThat(keyGen1.getPartitionPath(rowData1), is("par1/1970-01-01T00:00:00.001"));
+
+    // null record key and partition path
+    final RowData rowData2 = insertRow(null, null, 23, null, null);
+    assertThrows(HoodieKeyException.class, () -> keyGen1.getRecordKey(rowData2));
+    assertThat(keyGen1.getPartitionPath(rowData2), is("default/default"));
+    // empty record key and partition path
+    final RowData rowData3 = insertRow(StringData.fromString(""), StringData.fromString(""), 23,
+        TimestampData.fromEpochMillis(1), StringData.fromString(""));
+    assertThrows(HoodieKeyException.class, () -> keyGen1.getRecordKey(rowData3));
+    assertThat(keyGen1.getPartitionPath(rowData3), is("default/1970-01-01T00:00:00.001"));
+
+    // hive style partitioning
+    conf.set(FlinkOptions.HIVE_STYLE_PARTITIONING, true);
+    final RowDataKeyGen keyGen2 = RowDataKeyGen.instance(conf, TestConfigurations.ROW_TYPE);
+    assertThat(keyGen2.getPartitionPath(rowData1), is("partition=par1/ts=1970-01-01T00:00:00.001"));
+    assertThat(keyGen2.getPartitionPath(rowData2), is("partition=default/ts=default"));
+    assertThat(keyGen2.getPartitionPath(rowData3), is("partition=default/ts=1970-01-01T00:00:00.001"));
+  }
+}
--- a/hudi-flink/src/test/java/org/apache/hudi/table/HoodieDataSourceITCase.java
+++ b/hudi-flink/src/test/java/org/apache/hudi/table/HoodieDataSourceITCase.java
@@ -25,6 +25,7 @@ import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.util.StreamerUtil;
 import org.apache.hudi.utils.TestConfigurations;
 import org.apache.hudi.utils.TestData;
+import org.apache.hudi.utils.TestSQL;
 import org.apache.hudi.utils.TestUtils;
 import org.apache.hudi.utils.factory.CollectSinkTableFactory;

@@ -48,6 +49,7 @@ import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.Arguments;
 import org.junit.jupiter.params.provider.EnumSource;
 import org.junit.jupiter.params.provider.MethodSource;
+import org.junit.jupiter.params.provider.ValueSource;

 import java.io.File;
 import java.util.Collection;
@@ -66,7 +68,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue;

 /**
 * IT cases for Hoodie table source and sink.
- *
+ * <p>
 * Note: should add more SQL cases when batch write is supported.
 */
 public class HoodieDataSourceITCase extends AbstractTestBase {
@@ -267,17 +269,8 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
    }
    String hoodieTableDDL = TestConfigurations.getCreateHoodieTableDDL("t1", options);
    tableEnv.executeSql(hoodieTableDDL);
-    String insertInto = "insert into t1 values\n"
-        + "('id1','Danny',23,TIMESTAMP '1970-01-01 00:00:01','par1'),\n"
-        + "('id2','Stephen',33,TIMESTAMP '1970-01-01 00:00:02','par1'),\n"
-        + "('id3','Julian',53,TIMESTAMP '1970-01-01 00:00:03','par2'),\n"
-        + "('id4','Fabian',31,TIMESTAMP '1970-01-01 00:00:04','par2'),\n"
-        + "('id5','Sophia',18,TIMESTAMP '1970-01-01 00:00:05','par3'),\n"
-        + "('id6','Emma',20,TIMESTAMP '1970-01-01 00:00:06','par3'),\n"
-        + "('id7','Bob',44,TIMESTAMP '1970-01-01 00:00:07','par4'),\n"
-        + "('id8','Han',56,TIMESTAMP '1970-01-01 00:00:08','par4')";

-    execInsertSql(tableEnv, insertInto);
+    execInsertSql(tableEnv, TestSQL.INSERT_T1);

    List<Row> result1 = CollectionUtil.iterableToList(
        () -> tableEnv.sqlQuery("select * from t1").execute().collect());
@@ -296,40 +289,40 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
  void testWriteAndReadParMiddle(ExecMode execMode) throws Exception {
    boolean streaming = execMode == ExecMode.STREAM;
    String hoodieTableDDL = "create table t1(\n"
-            + "  uuid varchar(20),\n"
-            + "  name varchar(10),\n"
-            + "  age int,\n"
-            + "  `partition` varchar(20),\n" // test streaming read with partition field in the middle
-            + "  ts timestamp(3),\n"
-            + "  PRIMARY KEY(uuid) NOT ENFORCED\n"
-            + ")\n"
-            + "PARTITIONED BY (`partition`)\n"
-            + "with (\n"
-            + "  'connector' = 'hudi',\n"
-            + "  'path' = '" + tempFile.getAbsolutePath() + "',\n"
-            + "  'read.streaming.enabled' = '" + streaming + "'\n"
-            + ")";
+        + "  uuid varchar(20),\n"
+        + "  name varchar(10),\n"
+        + "  age int,\n"
+        + "  `partition` varchar(20),\n" // test streaming read with partition field in the middle
+        + "  ts timestamp(3),\n"
+        + "  PRIMARY KEY(uuid) NOT ENFORCED\n"
+        + ")\n"
+        + "PARTITIONED BY (`partition`)\n"
+        + "with (\n"
+        + "  'connector' = 'hudi',\n"
+        + "  'path' = '" + tempFile.getAbsolutePath() + "',\n"
+        + "  'read.streaming.enabled' = '" + streaming + "'\n"
+        + ")";
    streamTableEnv.executeSql(hoodieTableDDL);
    String insertInto = "insert into t1 values\n"
-            + "('id1','Danny',23,'par1',TIMESTAMP '1970-01-01 00:00:01'),\n"
-            + "('id2','Stephen',33,'par1',TIMESTAMP '1970-01-01 00:00:02'),\n"
-            + "('id3','Julian',53,'par2',TIMESTAMP '1970-01-01 00:00:03'),\n"
-            + "('id4','Fabian',31,'par2',TIMESTAMP '1970-01-01 00:00:04'),\n"
-            + "('id5','Sophia',18,'par3',TIMESTAMP '1970-01-01 00:00:05'),\n"
-            + "('id6','Emma',20,'par3',TIMESTAMP '1970-01-01 00:00:06'),\n"
-            + "('id7','Bob',44,'par4',TIMESTAMP '1970-01-01 00:00:07'),\n"
-            + "('id8','Han',56,'par4',TIMESTAMP '1970-01-01 00:00:08')";
+        + "('id1','Danny',23,'par1',TIMESTAMP '1970-01-01 00:00:01'),\n"
+        + "('id2','Stephen',33,'par1',TIMESTAMP '1970-01-01 00:00:02'),\n"
+        + "('id3','Julian',53,'par2',TIMESTAMP '1970-01-01 00:00:03'),\n"
+        + "('id4','Fabian',31,'par2',TIMESTAMP '1970-01-01 00:00:04'),\n"
+        + "('id5','Sophia',18,'par3',TIMESTAMP '1970-01-01 00:00:05'),\n"
+        + "('id6','Emma',20,'par3',TIMESTAMP '1970-01-01 00:00:06'),\n"
+        + "('id7','Bob',44,'par4',TIMESTAMP '1970-01-01 00:00:07'),\n"
+        + "('id8','Han',56,'par4',TIMESTAMP '1970-01-01 00:00:08')";
    execInsertSql(streamTableEnv, insertInto);

    final String expected = "["
-            + "id1,Danny,23,par1,1970-01-01T00:00:01, "
-            + "id2,Stephen,33,par1,1970-01-01T00:00:02, "
-            + "id3,Julian,53,par2,1970-01-01T00:00:03, "
-            + "id4,Fabian,31,par2,1970-01-01T00:00:04, "
-            + "id5,Sophia,18,par3,1970-01-01T00:00:05, "
-            + "id6,Emma,20,par3,1970-01-01T00:00:06, "
-            + "id7,Bob,44,par4,1970-01-01T00:00:07, "
-            + "id8,Han,56,par4,1970-01-01T00:00:08]";
+        + "id1,Danny,23,par1,1970-01-01T00:00:01, "
+        + "id2,Stephen,33,par1,1970-01-01T00:00:02, "
+        + "id3,Julian,53,par2,1970-01-01T00:00:03, "
+        + "id4,Fabian,31,par2,1970-01-01T00:00:04, "
+        + "id5,Sophia,18,par3,1970-01-01T00:00:05, "
+        + "id6,Emma,20,par3,1970-01-01T00:00:06, "
+        + "id7,Bob,44,par4,1970-01-01T00:00:07, "
+        + "id8,Han,56,par4,1970-01-01T00:00:08]";

    List<Row> result = execSelectSql(streamTableEnv, "select * from t1", execMode);

@@ -350,17 +343,7 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
    String hoodieTableDDL = TestConfigurations.getCreateHoodieTableDDL("t1", options);
    tableEnv.executeSql(hoodieTableDDL);

-    final String insertInto1 = "insert into t1 values\n"
-        + "('id1','Danny',23,TIMESTAMP '1970-01-01 00:00:01','par1'),\n"
-        + "('id2','Stephen',33,TIMESTAMP '1970-01-01 00:00:02','par1'),\n"
-        + "('id3','Julian',53,TIMESTAMP '1970-01-01 00:00:03','par2'),\n"
-        + "('id4','Fabian',31,TIMESTAMP '1970-01-01 00:00:04','par2'),\n"
-        + "('id5','Sophia',18,TIMESTAMP '1970-01-01 00:00:05','par3'),\n"
-        + "('id6','Emma',20,TIMESTAMP '1970-01-01 00:00:06','par3'),\n"
-        + "('id7','Bob',44,TIMESTAMP '1970-01-01 00:00:07','par4'),\n"
-        + "('id8','Han',56,TIMESTAMP '1970-01-01 00:00:08','par4')";
-
-    execInsertSql(tableEnv, insertInto1);
+    execInsertSql(tableEnv, TestSQL.INSERT_T1);

    // overwrite partition 'par1' and increase in age by 1
    final String insertInto2 = "insert overwrite t1 partition(`partition`='par1') values\n"
@@ -519,7 +502,7 @@ public class HoodieDataSourceITCase extends AbstractTestBase {

    // execute query and assert throws exception
    assertThrows(HoodieException.class, () -> execSelectSql(streamTableEnv, "select * from t1", 10),
-            "No successful commits under path " + tempFile.getAbsolutePath());
+        "No successful commits under path " + tempFile.getAbsolutePath());

  }

@@ -575,6 +558,80 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
    assertRowsEquals(result, expected);
  }

+  @ParameterizedTest
+  @ValueSource(booleans = {true, false})
+  void testBulkInsert(boolean hiveStylePartitioning) {
+    TableEnvironment tableEnv = batchTableEnv;
+    // csv source
+    String csvSourceDDL = TestConfigurations.getCsvSourceDDL("csv_source", "test_source_5.data");
+    tableEnv.executeSql(csvSourceDDL);
+
+    Map<String, String> options = new HashMap<>();
+    options.put(FlinkOptions.PATH.key(), tempFile.getAbsolutePath());
+    options.put(FlinkOptions.OPERATION.key(), "bulk_insert");
+    options.put(FlinkOptions.SINK_SHUFFLE_BY_PARTITION.key(), "true");
+    if (hiveStylePartitioning) {
+      options.put(FlinkOptions.HIVE_STYLE_PARTITIONING.key(), "true");
+    }
+    String hoodieTableDDL = TestConfigurations.getCreateHoodieTableDDL("hoodie_sink", options);
+    tableEnv.executeSql(hoodieTableDDL);
+
+    String insertInto = "insert into hoodie_sink select * from csv_source";
+    execInsertSql(tableEnv, insertInto);
+
+    List<Row> result1 = CollectionUtil.iterableToList(
+        () -> tableEnv.sqlQuery("select * from hoodie_sink").execute().collect());
+    assertRowsEquals(result1, TestData.DATA_SET_SOURCE_INSERT);
+    // apply filters
+    List<Row> result2 = CollectionUtil.iterableToList(
+        () -> tableEnv.sqlQuery("select * from hoodie_sink where uuid > 'id5'").execute().collect());
+    assertRowsEquals(result2, "["
+        + "id6,Emma,20,1970-01-01T00:00:06,par3, "
+        + "id7,Bob,44,1970-01-01T00:00:07,par4, "
+        + "id8,Han,56,1970-01-01T00:00:08,par4]");
+  }
+
+  @Test
+  void testBulkInsertNonPartitionedTable() {
+    TableEnvironment tableEnv = batchTableEnv;
+    String hoodieTableDDL = "create table t1(\n"
+        + "  uuid varchar(20),\n"
+        + "  name varchar(10),\n"
+        + "  age int,\n"
+        + "  ts timestamp(3),\n"
+        + "  `partition` varchar(20),\n"
+        + "  PRIMARY KEY(uuid) NOT ENFORCED\n"
+        + ")\n"
+        + "with (\n"
+        + "  'connector' = 'hudi',\n"
+        + "  'path' = '" + tempFile.getAbsolutePath() + "',\n"
+        + "  'write.operation' = 'bulk_insert'\n"
+        + ")";
+    tableEnv.executeSql(hoodieTableDDL);
+
+    final String insertInto1 = "insert into t1 values\n"
+        + "('id1','Danny',23,TIMESTAMP '1970-01-01 00:00:01','par1')";
+
+    execInsertSql(tableEnv, insertInto1);
+
+    final String insertInto2 = "insert into t1 values\n"
+        + "('id1','Stephen',33,TIMESTAMP '1970-01-01 00:00:02','par2'),\n"
+        + "('id1','Julian',53,TIMESTAMP '1970-01-01 00:00:03','par3'),\n"
+        + "('id1','Fabian',31,TIMESTAMP '1970-01-01 00:00:04','par4'),\n"
+        + "('id1','Sophia',18,TIMESTAMP '1970-01-01 00:00:05','par5')";
+
+    execInsertSql(tableEnv, insertInto2);
+
+    List<Row> result = CollectionUtil.iterableToList(
+        () -> tableEnv.sqlQuery("select * from t1").execute().collect());
+    assertRowsEquals(result, "["
+        + "id1,Danny,23,1970-01-01T00:00:01,par1, "
+        + "id1,Stephen,33,1970-01-01T00:00:02,par2, "
+        + "id1,Julian,53,1970-01-01T00:00:03,par3, "
+        + "id1,Fabian,31,1970-01-01T00:00:04,par4, "
+        + "id1,Sophia,18,1970-01-01T00:00:05,par5]", 3);
+  }
+
  // -------------------------------------------------------------------------
  //  Utilities
  // -------------------------------------------------------------------------
@@ -606,7 +663,7 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
  }

  private List<Row> execSelectSql(TableEnvironment tEnv, String select, ExecMode execMode)
-          throws TableNotExistException, InterruptedException {
+      throws TableNotExistException, InterruptedException {
    final String[] splits = select.split(" ");
    final String tableName = splits[splits.length - 1];
    switch (execMode) {
@@ -621,12 +678,12 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
  }

  private List<Row> execSelectSql(TableEnvironment tEnv, String select, long timeout)
-          throws InterruptedException, TableNotExistException {
+      throws InterruptedException, TableNotExistException {
    return execSelectSql(tEnv, select, timeout, null);
  }

  private List<Row> execSelectSql(TableEnvironment tEnv, String select, long timeout, String sourceTable)
-          throws InterruptedException, TableNotExistException {
+      throws InterruptedException, TableNotExistException {
    final String sinkDDL;
    if (sourceTable != null) {
      // use the source table schema as the sink schema if the source table was specified, .
--- a/hudi-flink/src/test/java/org/apache/hudi/utils/TestConfigurations.java
+++ b/hudi-flink/src/test/java/org/apache/hudi/utils/TestConfigurations.java
@@ -137,6 +137,22 @@ public class TestConfigurations {
    return builder.toString();
  }

+  public static String getCsvSourceDDL(String tableName, String fileName) {
+    String sourcePath = Objects.requireNonNull(Thread.currentThread()
+        .getContextClassLoader().getResource(fileName)).toString();
+    return "create table " + tableName + "(\n"
+        + "  uuid varchar(20),\n"
+        + "  name varchar(10),\n"
+        + "  age int,\n"
+        + "  ts timestamp(3),\n"
+        + "  `partition` varchar(20)\n"
+        + ") with (\n"
+        + "  'connector' = 'filesystem',\n"
+        + "  'path' = '" + sourcePath + "',\n"
+        + "  'format' = 'csv'\n"
+        + ")";
+  }
+
  public static final RowDataSerializer SERIALIZER = new RowDataSerializer(ROW_TYPE);

  public static Configuration getDefaultConf(String tablePath) {
--- a/hudi-flink/src/test/java/org/apache/hudi/utils/TestData.java
+++ b/hudi-flink/src/test/java/org/apache/hudi/utils/TestData.java
@@ -515,7 +515,7 @@ public class TestData {
    return Strings.join(fields, ",");
  }

-  private static BinaryRowData insertRow(Object... fields) {
+  public static BinaryRowData insertRow(Object... fields) {
    LogicalType[] types = TestConfigurations.ROW_TYPE.getFields().stream().map(RowType.RowField::getType)
        .toArray(LogicalType[]::new);
    assertEquals(
--- a/hudi-flink/src/test/java/org/apache/hudi/utils/TestSQL.java
+++ b/hudi-flink/src/test/java/org/apache/hudi/utils/TestSQL.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.utils;
+
+/**
+ * Test sql statements.
+ */
+public class TestSQL {
+  private TestSQL() {}
+
+  public static final String INSERT_T1 = "insert into t1 values\n"
+      + "('id1','Danny',23,TIMESTAMP '1970-01-01 00:00:01','par1'),\n"
+      + "('id2','Stephen',33,TIMESTAMP '1970-01-01 00:00:02','par1'),\n"
+      + "('id3','Julian',53,TIMESTAMP '1970-01-01 00:00:03','par2'),\n"
+      + "('id4','Fabian',31,TIMESTAMP '1970-01-01 00:00:04','par2'),\n"
+      + "('id5','Sophia',18,TIMESTAMP '1970-01-01 00:00:05','par3'),\n"
+      + "('id6','Emma',20,TIMESTAMP '1970-01-01 00:00:06','par3'),\n"
+      + "('id7','Bob',44,TIMESTAMP '1970-01-01 00:00:07','par4'),\n"
+      + "('id8','Han',56,TIMESTAMP '1970-01-01 00:00:08','par4')";
+}
--- a/hudi-flink/src/test/resources/test_source_5.data
+++ b/hudi-flink/src/test/resources/test_source_5.data
@@ -0,0 +1,8 @@
+id1,Danny,23,1970-01-01 00:00:01,par1
+id2,Stephen,33,1970-01-01 00:00:02,par1
+id3,Julian,53,1970-01-01 00:00:03,par2
+id4,Fabian,31,1970-01-01 00:00:04,par2
+id5,Sophia,18,1970-01-01 00:00:05,par3
+id6,Emma,20,1970-01-01 00:00:06,par3
+id7,Bob,44,1970-01-01 00:00:07,par4
+id8,Han,56,1970-01-01 00:00:08,par4