[HUDI-2161] Adding support to disable meta columns with bulk insert operation (#3247)
This commit is contained in:
committed by
GitHub
parent
2099bf41db
commit
d5026e9a24
@@ -141,6 +141,12 @@ public class HoodieTableConfig extends HoodieConfig implements Serializable {
|
||||
.noDefaultValue()
|
||||
.withDocumentation("Base path of the dataset that needs to be bootstrapped as a Hudi table");
|
||||
|
||||
public static final ConfigProperty<String> HOODIE_POPULATE_META_FIELDS = ConfigProperty
|
||||
.key("hoodie.populate.meta.fields")
|
||||
.defaultValue("true")
|
||||
.withDocumentation("When enabled, populates all meta fields. When disabled, no meta fields are populated "
|
||||
+ "and incremental queries will not be functional. This is only meant to be used for append only/immutable data for batch processing");
|
||||
|
||||
public static final String NO_OP_BOOTSTRAP_INDEX_CLASS = NoOpBootstrapIndex.class.getName();
|
||||
|
||||
public HoodieTableConfig(FileSystem fs, String metaPath, String payloadClassName) {
|
||||
@@ -313,6 +319,13 @@ public class HoodieTableConfig extends HoodieConfig implements Serializable {
|
||||
return getStringOrDefault(HOODIE_ARCHIVELOG_FOLDER_PROP);
|
||||
}
|
||||
|
||||
/**
|
||||
* @returns true is meta fields need to be populated. else returns false.
|
||||
*/
|
||||
public boolean populateMetaFields() {
|
||||
return Boolean.parseBoolean(getStringOrDefault(HOODIE_POPULATE_META_FIELDS));
|
||||
}
|
||||
|
||||
public Map<String, String> propsMap() {
|
||||
return props.entrySet().stream()
|
||||
.collect(Collectors.toMap(e -> String.valueOf(e.getKey()), e -> String.valueOf(e.getValue())));
|
||||
|
||||
@@ -27,6 +27,7 @@ import org.apache.hudi.common.fs.HoodieWrapperFileSystem;
|
||||
import org.apache.hudi.common.fs.NoOpConsistencyGuard;
|
||||
import org.apache.hudi.common.model.HoodieRecordPayload;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
import org.apache.hudi.common.model.WriteOperationType;
|
||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||
import org.apache.hudi.common.table.timeline.HoodieArchivedTimeline;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
@@ -312,6 +313,25 @@ public class HoodieTableMetaClient implements Serializable {
|
||||
return archivedTimeline;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate table properties.
|
||||
* @param properties Properties from writeConfig.
|
||||
* @param operationType operation type to be executed.
|
||||
*/
|
||||
public void validateTableProperties(Properties properties, WriteOperationType operationType) {
|
||||
// disabling meta fields are allowed only for bulk_insert operation
|
||||
if (!Boolean.parseBoolean((String) properties.getOrDefault(HoodieTableConfig.HOODIE_POPULATE_META_FIELDS.key(), HoodieTableConfig.HOODIE_POPULATE_META_FIELDS.defaultValue()))
|
||||
&& operationType != WriteOperationType.BULK_INSERT) {
|
||||
throw new HoodieException(HoodieTableConfig.HOODIE_POPULATE_META_FIELDS.key() + " can only be disabled for " + WriteOperationType.BULK_INSERT
|
||||
+ " operation");
|
||||
}
|
||||
// once meta fields are disabled, it cant be re-enabled for a given table.
|
||||
if (!getTableConfig().populateMetaFields()
|
||||
&& Boolean.parseBoolean((String) properties.getOrDefault(HoodieTableConfig.HOODIE_POPULATE_META_FIELDS.key(), HoodieTableConfig.HOODIE_POPULATE_META_FIELDS.defaultValue()))) {
|
||||
throw new HoodieException(HoodieTableConfig.HOODIE_POPULATE_META_FIELDS.key() + " already disabled for the table. Can't be re-enabled back");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method to initialize a given path as a hoodie table with configs passed in as Properties.
|
||||
*
|
||||
@@ -602,6 +622,7 @@ public class HoodieTableMetaClient implements Serializable {
|
||||
private String partitionColumns;
|
||||
private String bootstrapIndexClass;
|
||||
private String bootstrapBasePath;
|
||||
private Boolean populateMetaFields;
|
||||
|
||||
private PropertyBuilder() {
|
||||
|
||||
@@ -675,6 +696,11 @@ public class HoodieTableMetaClient implements Serializable {
|
||||
return this;
|
||||
}
|
||||
|
||||
public PropertyBuilder setPopulateMetaFields(boolean populateMetaFields) {
|
||||
this.populateMetaFields = populateMetaFields;
|
||||
return this;
|
||||
}
|
||||
|
||||
public PropertyBuilder fromMetaClient(HoodieTableMetaClient metaClient) {
|
||||
return setTableType(metaClient.getTableType())
|
||||
.setTableName(metaClient.getTableConfig().getTableName())
|
||||
@@ -725,6 +751,9 @@ public class HoodieTableMetaClient implements Serializable {
|
||||
if (hoodieConfig.contains(HoodieTableConfig.HOODIE_TABLE_CREATE_SCHEMA)) {
|
||||
setTableCreateSchema(hoodieConfig.getString(HoodieTableConfig.HOODIE_TABLE_CREATE_SCHEMA));
|
||||
}
|
||||
if (hoodieConfig.contains(HoodieTableConfig.HOODIE_POPULATE_META_FIELDS)) {
|
||||
setPopulateMetaFields(hoodieConfig.getBoolean(HoodieTableConfig.HOODIE_POPULATE_META_FIELDS));
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
@@ -778,6 +807,9 @@ public class HoodieTableMetaClient implements Serializable {
|
||||
if (null != recordKeyFields) {
|
||||
tableConfig.setValue(HoodieTableConfig.HOODIE_TABLE_RECORDKEY_FIELDS, recordKeyFields);
|
||||
}
|
||||
if (null != populateMetaFields) {
|
||||
tableConfig.setValue(HoodieTableConfig.HOODIE_POPULATE_META_FIELDS, Boolean.toString(populateMetaFields));
|
||||
}
|
||||
return tableConfig.getProps();
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user