[HUDI-2483] Infer changelog mode for flink compactor (#3706)
This commit is contained in:
@@ -66,6 +66,9 @@ public class HoodieFlinkCompactor {
|
||||
// set table schema
|
||||
CompactionUtil.setAvroSchema(conf, metaClient);
|
||||
|
||||
// infer changelog mode
|
||||
CompactionUtil.inferChangelogMode(conf, metaClient);
|
||||
|
||||
HoodieFlinkWriteClient writeClient = StreamerUtil.createWriteClient(conf, null);
|
||||
HoodieFlinkTable<?> table = writeClient.getHoodieTable();
|
||||
|
||||
|
||||
@@ -50,6 +50,8 @@ import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import scala.Serializable;
|
||||
|
||||
import static org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN;
|
||||
import static org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN_OR_EQUALS;
|
||||
import static org.apache.hudi.common.table.timeline.HoodieTimeline.LESSER_THAN_OR_EQUALS;
|
||||
@@ -66,7 +68,10 @@ import static org.apache.hudi.common.table.timeline.HoodieTimeline.LESSER_THAN_O
|
||||
* <li>use the file paths from #step 3 as the back-up of the filesystem view.</li>
|
||||
* </ol>
|
||||
*/
|
||||
public class IncrementalInputSplits {
|
||||
public class IncrementalInputSplits implements Serializable {
|
||||
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(IncrementalInputSplits.class);
|
||||
private final Configuration conf;
|
||||
private final Path path;
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
package org.apache.hudi.util;
|
||||
|
||||
import org.apache.hudi.client.HoodieFlinkWriteClient;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.TableSchemaResolver;
|
||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||
@@ -76,6 +77,21 @@ public class CompactionUtil {
|
||||
conf.setString(FlinkOptions.SOURCE_AVRO_SCHEMA, tableAvroSchema.toString());
|
||||
}
|
||||
|
||||
/**
|
||||
* Infers the changelog mode based on the data file schema(including metadata fields).
|
||||
*
|
||||
* <p>We can improve the code if the changelog mode is set up as table config.
|
||||
*
|
||||
* @param conf The configuration
|
||||
*/
|
||||
public static void inferChangelogMode(Configuration conf, HoodieTableMetaClient metaClient) throws Exception {
|
||||
TableSchemaResolver tableSchemaResolver = new TableSchemaResolver(metaClient);
|
||||
Schema tableAvroSchema = tableSchemaResolver.getTableAvroSchemaFromDataFile();
|
||||
if (tableAvroSchema.getField(HoodieRecord.OPERATION_METADATA_FIELD) != null) {
|
||||
conf.setBoolean(FlinkOptions.CHANGELOG_ENABLED, true);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleans the metadata file for given instant {@code instant}.
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user