[HUDI-1771] Propagate CDC format for hoodie (#3285)
This commit is contained in:
@@ -71,7 +71,7 @@ public class HoodieDLAClient extends AbstractSyncHoodieClient {
|
||||
|
||||
public HoodieDLAClient(DLASyncConfig syncConfig, FileSystem fs) {
|
||||
super(syncConfig.basePath, syncConfig.assumeDatePartitioning, syncConfig.useFileListingFromMetadata,
|
||||
syncConfig.verifyMetadataFileListing, fs);
|
||||
syncConfig.verifyMetadataFileListing, false, fs);
|
||||
this.dlaConfig = syncConfig;
|
||||
try {
|
||||
this.partitionValueExtractor =
|
||||
|
||||
@@ -120,6 +120,9 @@ public class HiveSyncConfig implements Serializable {
|
||||
@Parameter(names = {"--spark-schema-length-threshold"}, description = "The maximum length allowed in a single cell when storing additional schema information in Hive's metastore.")
|
||||
public int sparkSchemaLengthThreshold = 4000;
|
||||
|
||||
@Parameter(names = {"--with-operation-field"}, description = "Whether to include the '_hoodie_operation' field in the metadata fields")
|
||||
public Boolean withOperationField = false;
|
||||
|
||||
// enhance the similar function in child class
|
||||
public static HiveSyncConfig copy(HiveSyncConfig cfg) {
|
||||
HiveSyncConfig newConfig = new HiveSyncConfig();
|
||||
@@ -143,6 +146,7 @@ public class HiveSyncConfig implements Serializable {
|
||||
newConfig.batchSyncNum = cfg.batchSyncNum;
|
||||
newConfig.syncAsSparkDataSourceTable = cfg.syncAsSparkDataSourceTable;
|
||||
newConfig.sparkSchemaLengthThreshold = cfg.sparkSchemaLengthThreshold;
|
||||
newConfig.withOperationField = cfg.withOperationField;
|
||||
return newConfig;
|
||||
}
|
||||
|
||||
@@ -174,6 +178,7 @@ public class HiveSyncConfig implements Serializable {
|
||||
+ ", createManagedTable=" + createManagedTable
|
||||
+ ", syncAsSparkDataSourceTable=" + syncAsSparkDataSourceTable
|
||||
+ ", sparkSchemaLengthThreshold=" + sparkSchemaLengthThreshold
|
||||
+ ", withOperationField=" + withOperationField
|
||||
+ '}';
|
||||
}
|
||||
}
|
||||
|
||||
@@ -62,7 +62,7 @@ public class HoodieHiveClient extends AbstractSyncHoodieClient {
|
||||
private final HiveSyncConfig syncConfig;
|
||||
|
||||
public HoodieHiveClient(HiveSyncConfig cfg, HiveConf configuration, FileSystem fs) {
|
||||
super(cfg.basePath, cfg.assumeDatePartitioning, cfg.useFileListingFromMetadata, cfg.verifyMetadataFileListing, fs);
|
||||
super(cfg.basePath, cfg.assumeDatePartitioning, cfg.useFileListingFromMetadata, cfg.verifyMetadataFileListing, cfg.withOperationField, fs);
|
||||
this.syncConfig = cfg;
|
||||
|
||||
// Support JDBC, HiveQL and metastore based implementations for backwards compatiblity. Future users should
|
||||
|
||||
@@ -51,19 +51,21 @@ public abstract class AbstractSyncHoodieClient {
|
||||
protected final HoodieTableMetaClient metaClient;
|
||||
protected final HoodieTableType tableType;
|
||||
protected final FileSystem fs;
|
||||
private String basePath;
|
||||
private boolean assumeDatePartitioning;
|
||||
private boolean useFileListingFromMetadata;
|
||||
private boolean verifyMetadataFileListing;
|
||||
private final String basePath;
|
||||
private final boolean assumeDatePartitioning;
|
||||
private final boolean useFileListingFromMetadata;
|
||||
private final boolean verifyMetadataFileListing;
|
||||
private final boolean withOperationField;
|
||||
|
||||
public AbstractSyncHoodieClient(String basePath, boolean assumeDatePartitioning, boolean useFileListingFromMetadata,
|
||||
boolean verifyMetadataFileListing, FileSystem fs) {
|
||||
boolean verifyMetadataFileListing, boolean withOperationField, FileSystem fs) {
|
||||
this.metaClient = HoodieTableMetaClient.builder().setConf(fs.getConf()).setBasePath(basePath).setLoadActiveTimelineOnLoad(true).build();
|
||||
this.tableType = metaClient.getTableType();
|
||||
this.basePath = basePath;
|
||||
this.assumeDatePartitioning = assumeDatePartitioning;
|
||||
this.useFileListingFromMetadata = useFileListingFromMetadata;
|
||||
this.verifyMetadataFileListing = verifyMetadataFileListing;
|
||||
this.withOperationField = withOperationField;
|
||||
this.fs = fs;
|
||||
}
|
||||
|
||||
@@ -139,7 +141,11 @@ public abstract class AbstractSyncHoodieClient {
|
||||
*/
|
||||
public MessageType getDataSchema() {
|
||||
try {
|
||||
return new TableSchemaResolver(metaClient).getTableParquetSchema();
|
||||
if (withOperationField) {
|
||||
return new TableSchemaResolver(metaClient, true).getTableParquetSchema();
|
||||
} else {
|
||||
return new TableSchemaResolver(metaClient).getTableParquetSchema();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new HoodieSyncException("Failed to read data schema", e);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user