1
0

[HUDI-1301] use spark INCREMENTAL mode query hudi dataset support schema version. (#2125)

This commit is contained in:
lw0090
2020-10-10 20:53:41 +08:00
committed by GitHub
parent eafd7bf289
commit 585ce0094d
3 changed files with 51 additions and 10 deletions

View File

@@ -175,20 +175,45 @@ public class TableSchemaResolver {
* @throws Exception
*/
public Schema getTableAvroSchemaWithoutMetadataFields() throws Exception {
Option<Schema> schemaFromCommitMetadata = getTableSchemaFromCommitMetadata(false);
HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
Option<Schema> schemaFromCommitMetadata = getTableSchemaFromCommitMetadata(timeline.lastInstant().get(), false);
return schemaFromCommitMetadata.isPresent() ? schemaFromCommitMetadata.get() :
HoodieAvroUtils.removeMetadataFields(getTableAvroSchemaFromDataFile());
}
/**
* Gets users data schema for a hoodie table in Avro format of the instant.
*
* @param instant will get the instant data schema
* @return Avro user data schema
* @throws Exception
*/
public Schema getTableAvroSchemaWithoutMetadataFields(HoodieInstant instant) throws Exception {
Option<Schema> schemaFromCommitMetadata = getTableSchemaFromCommitMetadata(instant, false);
return schemaFromCommitMetadata.isPresent() ? schemaFromCommitMetadata.get() :
HoodieAvroUtils.removeMetadataFields(getTableAvroSchemaFromDataFile());
}
/**
* Gets the schema for a hoodie table in Avro format from the HoodieCommitMetadata of the last commit.
*
* @return Avro schema for this table
*/
private Option<Schema> getTableSchemaFromCommitMetadata(boolean includeMetadataFields) {
HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
return getTableSchemaFromCommitMetadata(timeline.lastInstant().get(), includeMetadataFields);
}
/**
* Gets the schema for a hoodie table in Avro format from the HoodieCommitMetadata of the instant.
*
* @return Avro schema for this table
*/
private Option<Schema> getTableSchemaFromCommitMetadata(HoodieInstant instant, boolean includeMetadataFields) {
try {
HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
byte[] data = timeline.getInstantDetails(timeline.lastInstant().get()).get();
byte[] data = timeline.getInstantDetails(instant).get();
HoodieCommitMetadata metadata = HoodieCommitMetadata.fromBytes(data, HoodieCommitMetadata.class);
String existingSchemaStr = metadata.getMetadata(HoodieCommitMetadata.SCHEMA_KEY);