[HUDI-1529] Add block size to the FileStatus objects returned from metadata table to avoid too many file splits (#2451)
This commit is contained in:
@@ -202,7 +202,7 @@ public abstract class BaseTableMetadata implements HoodieTableMetadata {
|
||||
throw new HoodieMetadataException("Metadata record for partition " + partitionName + " is inconsistent: "
|
||||
+ hoodieRecord.get().getData());
|
||||
}
|
||||
statuses = hoodieRecord.get().getData().getFileStatuses(partitionPath);
|
||||
statuses = hoodieRecord.get().getData().getFileStatuses(hadoopConf.get(), partitionPath);
|
||||
}
|
||||
|
||||
if (validateLookups) {
|
||||
|
||||
@@ -29,7 +29,9 @@ import org.apache.hudi.exception.HoodieMetadataException;
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
import java.io.IOException;
|
||||
@@ -177,10 +179,12 @@ public class HoodieMetadataPayload implements HoodieRecordPayload<HoodieMetadata
|
||||
/**
|
||||
* Returns the files added as part of this record.
|
||||
*/
|
||||
public FileStatus[] getFileStatuses(Path partitionPath) {
|
||||
public FileStatus[] getFileStatuses(Configuration hadoopConf, Path partitionPath) throws IOException {
|
||||
FileSystem fs = partitionPath.getFileSystem(hadoopConf);
|
||||
long blockSize = fs.getDefaultBlockSize(partitionPath);
|
||||
return filterFileInfoEntries(false)
|
||||
.map(e -> new FileStatus(e.getValue().getSize(), false, 0, 0, 0, 0, null, null, null,
|
||||
new Path(partitionPath, e.getKey())))
|
||||
.map(e -> new FileStatus(e.getValue().getSize(), false, 0, blockSize, 0, 0,
|
||||
null, null, null, new Path(partitionPath, e.getKey())))
|
||||
.toArray(FileStatus[]::new);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user