1
0

[HUDI-1877] Support records staying in same fileId after clustering (#3833)

* [HUDI-1877] Support records staying in same fileId after clustering

Add plan strategy

* Ensure same filegroup id and refactor based on comments
This commit is contained in:
Sagar Sumit
2021-11-10 09:47:50 +05:30
committed by GitHub
parent dfe3b84715
commit bb6a19e7d7
14 changed files with 280 additions and 81 deletions

View File

@@ -23,7 +23,9 @@ import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
public class CreateHandleFactory<T extends HoodieRecordPayload, I, K, O> extends WriteHandleFactory<T, I, K, O> {
import java.io.Serializable;
public class CreateHandleFactory<T extends HoodieRecordPayload, I, K, O> extends WriteHandleFactory<T, I, K, O> implements Serializable {
private boolean preserveMetadata = false;

View File

@@ -24,6 +24,7 @@ import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.table.HoodieTable;
import java.io.Serializable;
import java.util.concurrent.atomic.AtomicBoolean;
/**
@@ -31,11 +32,11 @@ import java.util.concurrent.atomic.AtomicBoolean;
* <p>
* Please use this with caution. This can end up creating very large files if not used correctly.
*/
public class SingleFileHandleCreateFactory<T extends HoodieRecordPayload, I, K, O> extends WriteHandleFactory<T, I, K, O> {
public class SingleFileHandleCreateFactory<T extends HoodieRecordPayload, I, K, O> extends CreateHandleFactory<T, I, K, O> implements Serializable {
private AtomicBoolean isHandleCreated = new AtomicBoolean(false);
private String fileId;
private boolean preserveHoodieMetadata;
private final AtomicBoolean isHandleCreated = new AtomicBoolean(false);
private final String fileId;
private final boolean preserveHoodieMetadata;
public SingleFileHandleCreateFactory(String fileId, boolean preserveHoodieMetadata) {
super();

View File

@@ -23,7 +23,9 @@ import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
public abstract class WriteHandleFactory<T extends HoodieRecordPayload, I, K, O> {
import java.io.Serializable;
public abstract class WriteHandleFactory<T extends HoodieRecordPayload, I, K, O> implements Serializable {
private int numFilesWritten = 0;
public abstract HoodieWriteHandle<T, I, K, O> create(HoodieWriteConfig config, String commitTime, HoodieTable<T, I, K, O> hoodieTable,

View File

@@ -21,6 +21,7 @@ package org.apache.hudi.table.action.commit;
import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.io.WriteHandleFactory;
import org.apache.hudi.table.BulkInsertPartitioner;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.HoodieWriteMetadata;
@@ -44,5 +45,5 @@ public abstract class AbstractBulkInsertHelper<T extends HoodieRecordPayload, I,
Option<BulkInsertPartitioner<T>> userDefinedBulkInsertPartitioner,
boolean addMetadataFields,
int parallelism,
boolean preserveMetadata);
WriteHandleFactory writeHandleFactory);
}