[MINOR] Cosmetic changes for flink (#3701)
This commit is contained in:
@@ -67,7 +67,7 @@ import java.util.stream.Collectors;
|
|||||||
* <p><h2>The Semantics</h2>
|
* <p><h2>The Semantics</h2>
|
||||||
*
|
*
|
||||||
* <p>The task implements exactly-once semantics by buffering the data between checkpoints. The operator coordinator
|
* <p>The task implements exactly-once semantics by buffering the data between checkpoints. The operator coordinator
|
||||||
* starts a new instant on the time line when a checkpoint triggers, the coordinator checkpoints always
|
* starts a new instant on the timeline when a checkpoint triggers, the coordinator checkpoints always
|
||||||
* start before its operator, so when this function starts a checkpoint, a REQUESTED instant already exists.
|
* start before its operator, so when this function starts a checkpoint, a REQUESTED instant already exists.
|
||||||
*
|
*
|
||||||
* <p>The function process thread blocks data buffering after the checkpoint thread finishes flushing the existing data buffer until
|
* <p>The function process thread blocks data buffering after the checkpoint thread finishes flushing the existing data buffer until
|
||||||
|
|||||||
@@ -28,11 +28,8 @@ import java.util.List;
|
|||||||
/**
|
/**
|
||||||
* WriteProfile that always return empty small files.
|
* WriteProfile that always return empty small files.
|
||||||
*
|
*
|
||||||
* <p>This write profile is used for cases:
|
* <p>This write profile is used for INSERT OVERWRITE and INSERT OVERWRITE TABLE operations,
|
||||||
* i). INSERT OVERWRITE and INSERT OVERWRITE TABLE operations,
|
* the existing small files are ignored because of the 'OVERWRITE' semantics.
|
||||||
* the existing small files are ignored because of the 'OVERWRITE' semantics;
|
|
||||||
* ii). INSERT operation when data file merge is disabled.
|
|
||||||
*
|
|
||||||
*
|
*
|
||||||
* <p>Note: assumes the index can always index log files for Flink write.
|
* <p>Note: assumes the index can always index log files for Flink write.
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -41,8 +41,9 @@ import org.apache.flink.table.types.logical.RowType;
|
|||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An Utility which can incrementally consume data from Kafka and apply it to the target table.
|
* A utility which can incrementally consume data from Kafka and apply it to the target table.
|
||||||
* currently, it only supports COW table and insert, upsert operation.
|
* It has the similar functionality with SQL data source except that the source is bind to Kafka
|
||||||
|
* and the format is bind to JSON.
|
||||||
*/
|
*/
|
||||||
public class HoodieFlinkStreamer {
|
public class HoodieFlinkStreamer {
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
|
|||||||
@@ -135,7 +135,7 @@ public class HoodieTableFactory implements DynamicTableSourceFactory, DynamicTab
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Setup the config options based on the table definition, for e.g the table name, primary key.
|
* Sets up the config options based on the table definition, for e.g the table name, primary key.
|
||||||
*
|
*
|
||||||
* @param conf The configuration to setup
|
* @param conf The configuration to setup
|
||||||
* @param tableName The table name
|
* @param tableName The table name
|
||||||
|
|||||||
@@ -40,6 +40,7 @@ import org.apache.hudi.table.format.mor.MergeOnReadInputSplit;
|
|||||||
import org.apache.hudi.table.format.mor.MergeOnReadTableState;
|
import org.apache.hudi.table.format.mor.MergeOnReadTableState;
|
||||||
import org.apache.hudi.util.AvroSchemaConverter;
|
import org.apache.hudi.util.AvroSchemaConverter;
|
||||||
import org.apache.hudi.util.ChangelogModes;
|
import org.apache.hudi.util.ChangelogModes;
|
||||||
|
import org.apache.hudi.util.InputFormats;
|
||||||
import org.apache.hudi.util.StreamerUtil;
|
import org.apache.hudi.util.StreamerUtil;
|
||||||
|
|
||||||
import org.apache.avro.Schema;
|
import org.apache.avro.Schema;
|
||||||
@@ -48,7 +49,6 @@ import org.apache.flink.api.common.io.FileInputFormat;
|
|||||||
import org.apache.flink.api.common.io.FilePathFilter;
|
import org.apache.flink.api.common.io.FilePathFilter;
|
||||||
import org.apache.flink.api.common.io.InputFormat;
|
import org.apache.flink.api.common.io.InputFormat;
|
||||||
import org.apache.flink.api.common.typeinfo.TypeInformation;
|
import org.apache.flink.api.common.typeinfo.TypeInformation;
|
||||||
import org.apache.flink.api.java.io.CollectionInputFormat;
|
|
||||||
import org.apache.flink.configuration.Configuration;
|
import org.apache.flink.configuration.Configuration;
|
||||||
import org.apache.flink.streaming.api.datastream.DataStream;
|
import org.apache.flink.streaming.api.datastream.DataStream;
|
||||||
import org.apache.flink.streaming.api.datastream.DataStreamSource;
|
import org.apache.flink.streaming.api.datastream.DataStreamSource;
|
||||||
@@ -108,9 +108,6 @@ public class HoodieTableSource implements
|
|||||||
|
|
||||||
private static final int NO_LIMIT_CONSTANT = -1;
|
private static final int NO_LIMIT_CONSTANT = -1;
|
||||||
|
|
||||||
private static final InputFormat<RowData, ?> EMPTY_INPUT_FORMAT =
|
|
||||||
new CollectionInputFormat<>(Collections.emptyList(), null);
|
|
||||||
|
|
||||||
private final transient org.apache.hadoop.conf.Configuration hadoopConf;
|
private final transient org.apache.hadoop.conf.Configuration hadoopConf;
|
||||||
private final transient HoodieTableMetaClient metaClient;
|
private final transient HoodieTableMetaClient metaClient;
|
||||||
private final long maxCompactionMemoryInBytes;
|
private final long maxCompactionMemoryInBytes;
|
||||||
@@ -340,7 +337,7 @@ public class HoodieTableSource implements
|
|||||||
if (inputSplits.size() == 0) {
|
if (inputSplits.size() == 0) {
|
||||||
// When there is no input splits, just return an empty source.
|
// When there is no input splits, just return an empty source.
|
||||||
LOG.warn("No input splits generate for MERGE_ON_READ input format, returns empty collection instead");
|
LOG.warn("No input splits generate for MERGE_ON_READ input format, returns empty collection instead");
|
||||||
return EMPTY_INPUT_FORMAT;
|
return InputFormats.EMPTY_INPUT_FORMAT;
|
||||||
}
|
}
|
||||||
return mergeOnReadInputFormat(rowType, requiredRowType, tableAvroSchema,
|
return mergeOnReadInputFormat(rowType, requiredRowType, tableAvroSchema,
|
||||||
rowDataType, inputSplits, false);
|
rowDataType, inputSplits, false);
|
||||||
@@ -360,7 +357,7 @@ public class HoodieTableSource implements
|
|||||||
if (result.isEmpty()) {
|
if (result.isEmpty()) {
|
||||||
// When there is no input splits, just return an empty source.
|
// When there is no input splits, just return an empty source.
|
||||||
LOG.warn("No input splits generate for incremental read, returns empty collection instead");
|
LOG.warn("No input splits generate for incremental read, returns empty collection instead");
|
||||||
return new CollectionInputFormat<>(Collections.emptyList(), null);
|
return InputFormats.EMPTY_INPUT_FORMAT;
|
||||||
}
|
}
|
||||||
return mergeOnReadInputFormat(rowType, requiredRowType, tableAvroSchema,
|
return mergeOnReadInputFormat(rowType, requiredRowType, tableAvroSchema,
|
||||||
rowDataType, result.getInputSplits(), false);
|
rowDataType, result.getInputSplits(), false);
|
||||||
@@ -419,7 +416,7 @@ public class HoodieTableSource implements
|
|||||||
private InputFormat<RowData, ?> baseFileOnlyInputFormat() {
|
private InputFormat<RowData, ?> baseFileOnlyInputFormat() {
|
||||||
final Path[] paths = getReadPaths();
|
final Path[] paths = getReadPaths();
|
||||||
if (paths.length == 0) {
|
if (paths.length == 0) {
|
||||||
return EMPTY_INPUT_FORMAT;
|
return InputFormats.EMPTY_INPUT_FORMAT;
|
||||||
}
|
}
|
||||||
FileInputFormat<RowData> format = new CopyOnWriteInputFormat(
|
FileInputFormat<RowData> format = new CopyOnWriteInputFormat(
|
||||||
FilePathUtils.toFlinkPaths(paths),
|
FilePathUtils.toFlinkPaths(paths),
|
||||||
|
|||||||
@@ -125,7 +125,7 @@ public class MergeOnReadInputFormat
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Flag saying whether to emit the deletes. In streaming read mode, downstream
|
* Flag saying whether to emit the deletes. In streaming read mode, downstream
|
||||||
* operators need the delete messages to retract the legacy accumulator.
|
* operators need the DELETE messages to retract the legacy accumulator.
|
||||||
*/
|
*/
|
||||||
private boolean emitDelete;
|
private boolean emitDelete;
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,33 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hudi.util;
|
||||||
|
|
||||||
|
import org.apache.flink.api.common.io.InputFormat;
|
||||||
|
import org.apache.flink.api.java.io.CollectionInputFormat;
|
||||||
|
import org.apache.flink.table.data.RowData;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utilities for all kinds of {@link org.apache.flink.api.common.io.InputFormat}s.
|
||||||
|
*/
|
||||||
|
public class InputFormats {
|
||||||
|
public static final InputFormat<RowData, ?> EMPTY_INPUT_FORMAT =
|
||||||
|
new CollectionInputFormat<>(Collections.emptyList(), null);
|
||||||
|
}
|
||||||
@@ -250,7 +250,7 @@ public class StreamerUtil {
|
|||||||
basePath, conf.getString(FlinkOptions.TABLE_NAME));
|
basePath, conf.getString(FlinkOptions.TABLE_NAME));
|
||||||
}
|
}
|
||||||
// Do not close the filesystem in order to use the CACHE,
|
// Do not close the filesystem in order to use the CACHE,
|
||||||
// some of the filesystems release the handles in #close method.
|
// some filesystems release the handles in #close method.
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -359,7 +359,7 @@ public class StreamerUtil {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return the median instant time between the given two instant time.
|
* Returns the median instant time between the given two instant time.
|
||||||
*/
|
*/
|
||||||
public static String medianInstantTime(String highVal, String lowVal) {
|
public static String medianInstantTime(String highVal, String lowVal) {
|
||||||
try {
|
try {
|
||||||
@@ -399,6 +399,10 @@ public class StreamerUtil {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns whether the give file is in valid hoodie format.
|
||||||
|
* For example, filtering out the empty or corrupt files.
|
||||||
|
*/
|
||||||
public static boolean isValidFile(FileStatus fileStatus) {
|
public static boolean isValidFile(FileStatus fileStatus) {
|
||||||
final String extension = FSUtils.getFileExtension(fileStatus.getPath().toString());
|
final String extension = FSUtils.getFileExtension(fileStatus.getPath().toString());
|
||||||
if (PARQUET.getFileExtension().equals(extension)) {
|
if (PARQUET.getFileExtension().equals(extension)) {
|
||||||
@@ -416,11 +420,19 @@ public class StreamerUtil {
|
|||||||
return fileStatus.getLen() > 0;
|
return fileStatus.getLen() > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns whether insert deduplication is allowed with given configuration {@code conf}.
|
||||||
|
*/
|
||||||
public static boolean allowDuplicateInserts(Configuration conf) {
|
public static boolean allowDuplicateInserts(Configuration conf) {
|
||||||
WriteOperationType operationType = WriteOperationType.fromValue(conf.getString(FlinkOptions.OPERATION));
|
WriteOperationType operationType = WriteOperationType.fromValue(conf.getString(FlinkOptions.OPERATION));
|
||||||
return operationType == WriteOperationType.INSERT && !conf.getBoolean(FlinkOptions.INSERT_DEDUP);
|
return operationType == WriteOperationType.INSERT && !conf.getBoolean(FlinkOptions.INSERT_DEDUP);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns whether there are successful commits on the timeline.
|
||||||
|
* @param metaClient The meta client
|
||||||
|
* @return true if there is any successful commit
|
||||||
|
*/
|
||||||
public static boolean haveSuccessfulCommits(HoodieTableMetaClient metaClient) {
|
public static boolean haveSuccessfulCommits(HoodieTableMetaClient metaClient) {
|
||||||
return !metaClient.getCommitsTimeline().filterCompletedInstants().empty();
|
return !metaClient.getCommitsTimeline().filterCompletedInstants().empty();
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user