1
0

[HUDI-2558] Fixing Clustering w/ sort columns with null values fails (#4404)

This commit is contained in:
harshal
2022-01-03 12:19:43 +05:30
committed by GitHub
parent 0273f2e65d
commit 2b2ae34cb9
2 changed files with 33 additions and 1 deletions

View File

@@ -22,6 +22,7 @@ import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.common.config.SerializableSchema;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.BulkInsertPartitioner;
@@ -55,8 +56,17 @@ public class RDDCustomColumnsSortPartitioner<T extends HoodieRecordPayload>
final String[] sortColumns = this.sortColumnNames;
final SerializableSchema schema = this.serializableSchema;
return records.sortBy(
record -> HoodieAvroUtils.getRecordColumnValues(record, sortColumns, schema),
record -> {
Object recordValue = HoodieAvroUtils.getRecordColumnValues(record, sortColumns, schema);
// null values are replaced with empty string for null_first order
if (recordValue == null) {
return StringUtils.EMPTY_STRING;
} else {
return StringUtils.objToString(record);
}
},
true, outputSparkPartitions);
}
@Override