1
0

[HUDI-4171] Fixing Non partitioned with virtual keys in read path (#5747)

- When Non partitioned key gen is used with virtual keys, read path could break since partition path may not exist.
This commit is contained in:
Sivabalan Narayanan
2022-06-06 15:48:21 -04:00
committed by GitHub
parent 21b903fddb
commit 7da97c8096
7 changed files with 87 additions and 22 deletions

View File

@@ -43,6 +43,7 @@ import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.util.CollectionUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.hadoop.realtime.HoodieVirtualKeyInfo;
@@ -275,16 +276,16 @@ public class HoodieCopyOnWriteTableInputFormat extends HoodieTableInputFormat {
if (tableConfig.populateMetaFields()) {
return Option.empty();
}
TableSchemaResolver tableSchemaResolver = new TableSchemaResolver(metaClient);
try {
Schema schema = tableSchemaResolver.getTableAvroSchema();
boolean isNonPartitionedKeyGen = StringUtils.isNullOrEmpty(tableConfig.getPartitionFieldProp());
return Option.of(
new HoodieVirtualKeyInfo(
tableConfig.getRecordKeyFieldProp(),
tableConfig.getPartitionFieldProp(),
isNonPartitionedKeyGen ? Option.empty() : Option.of(tableConfig.getPartitionFieldProp()),
schema.getField(tableConfig.getRecordKeyFieldProp()).pos(),
schema.getField(tableConfig.getPartitionFieldProp()).pos()));
isNonPartitionedKeyGen ? Option.empty() : Option.of(schema.getField(tableConfig.getPartitionFieldProp()).pos())));
} catch (Exception exception) {
throw new HoodieException("Fetching table schema failed with exception ", exception);
}

View File

@@ -18,6 +18,8 @@
package org.apache.hudi.hadoop.realtime;
import org.apache.hudi.common.util.Option;
import java.io.Serializable;
/**
@@ -26,11 +28,11 @@ import java.io.Serializable;
public class HoodieVirtualKeyInfo implements Serializable {
private final String recordKeyField;
private final String partitionPathField;
private final Option<String> partitionPathField;
private final int recordKeyFieldIndex;
private final int partitionPathFieldIndex;
private final Option<Integer> partitionPathFieldIndex;
public HoodieVirtualKeyInfo(String recordKeyField, String partitionPathField, int recordKeyFieldIndex, int partitionPathFieldIndex) {
public HoodieVirtualKeyInfo(String recordKeyField, Option<String> partitionPathField, int recordKeyFieldIndex, Option<Integer> partitionPathFieldIndex) {
this.recordKeyField = recordKeyField;
this.partitionPathField = partitionPathField;
this.recordKeyFieldIndex = recordKeyFieldIndex;
@@ -41,7 +43,7 @@ public class HoodieVirtualKeyInfo implements Serializable {
return recordKeyField;
}
public String getPartitionPathField() {
public Option<String> getPartitionPathField() {
return partitionPathField;
}
@@ -49,7 +51,7 @@ public class HoodieVirtualKeyInfo implements Serializable {
return recordKeyFieldIndex;
}
public int getPartitionPathFieldIndex() {
public Option<Integer> getPartitionPathFieldIndex() {
return partitionPathFieldIndex;
}
@@ -57,9 +59,9 @@ public class HoodieVirtualKeyInfo implements Serializable {
public String toString() {
return "HoodieVirtualKeyInfo{"
+ "recordKeyField='" + recordKeyField + '\''
+ ", partitionPathField='" + partitionPathField + '\''
+ ", partitionPathField='" + (partitionPathField.isPresent() ? partitionPathField.get() : "null") + '\''
+ ", recordKeyFieldIndex=" + recordKeyFieldIndex
+ ", partitionPathFieldIndex=" + partitionPathFieldIndex
+ ", partitionPathFieldIndex=" + (partitionPathFieldIndex.isPresent() ? partitionPathFieldIndex.get() : "-1")
+ '}';
}
}

View File

@@ -107,9 +107,12 @@ public interface RealtimeSplit extends InputSplitWithLocationInfo {
} else {
InputSplitUtils.writeBoolean(true, out);
InputSplitUtils.writeString(virtualKeyInfoOpt.get().getRecordKeyField(), out);
InputSplitUtils.writeString(virtualKeyInfoOpt.get().getPartitionPathField(), out);
InputSplitUtils.writeString(String.valueOf(virtualKeyInfoOpt.get().getRecordKeyFieldIndex()), out);
InputSplitUtils.writeString(String.valueOf(virtualKeyInfoOpt.get().getPartitionPathFieldIndex()), out);
InputSplitUtils.writeBoolean(virtualKeyInfoOpt.get().getPartitionPathField().isPresent(), out);
if (virtualKeyInfoOpt.get().getPartitionPathField().isPresent()) {
InputSplitUtils.writeString(virtualKeyInfoOpt.get().getPartitionPathField().get(), out);
InputSplitUtils.writeString(String.valueOf(virtualKeyInfoOpt.get().getPartitionPathFieldIndex()), out);
}
}
}
@@ -130,9 +133,10 @@ public interface RealtimeSplit extends InputSplitWithLocationInfo {
boolean hoodieVirtualKeyPresent = InputSplitUtils.readBoolean(in);
if (hoodieVirtualKeyPresent) {
String recordKeyField = InputSplitUtils.readString(in);
String partitionPathField = InputSplitUtils.readString(in);
int recordFieldIndex = Integer.parseInt(InputSplitUtils.readString(in));
int partitionPathIndex = Integer.parseInt(InputSplitUtils.readString(in));
boolean isPartitionPathFieldPresent = InputSplitUtils.readBoolean(in);
Option<String> partitionPathField = isPartitionPathFieldPresent ? Option.of(InputSplitUtils.readString(in)) : Option.empty();
Option<Integer> partitionPathIndex = isPartitionPathFieldPresent ? Option.of(Integer.parseInt(InputSplitUtils.readString(in))) : Option.empty();
setVirtualKeyInfo(Option.of(new HoodieVirtualKeyInfo(recordKeyField, partitionPathField, recordFieldIndex, partitionPathIndex)));
}
}

View File

@@ -87,7 +87,9 @@ public class HoodieRealtimeInputFormatUtils extends HoodieInputFormatUtils {
} else {
HoodieVirtualKeyInfo hoodieVirtualKey = hoodieVirtualKeyInfo.get();
addProjectionField(configuration, hoodieVirtualKey.getRecordKeyField(), hoodieVirtualKey.getRecordKeyFieldIndex());
addProjectionField(configuration, hoodieVirtualKey.getPartitionPathField(), hoodieVirtualKey.getPartitionPathFieldIndex());
if (hoodieVirtualKey.getPartitionPathField().isPresent()) {
addProjectionField(configuration, hoodieVirtualKey.getPartitionPathField().get(), hoodieVirtualKey.getPartitionPathFieldIndex().get());
}
}
}
@@ -99,7 +101,8 @@ public class HoodieRealtimeInputFormatUtils extends HoodieInputFormatUtils {
&& readColNames.contains(HoodieRecord.PARTITION_PATH_METADATA_FIELD);
} else {
return readColNames.contains(hoodieVirtualKeyInfo.get().getRecordKeyField())
&& readColNames.contains(hoodieVirtualKeyInfo.get().getPartitionPathField());
&& (hoodieVirtualKeyInfo.get().getPartitionPathField().isPresent() ? readColNames.contains(hoodieVirtualKeyInfo.get().getPartitionPathField().get())
: true);
}
}