1
0

[HUDI-1479] Use HoodieEngineContext to parallelize fetching of partiton paths (#2417)

* [HUDI-1479] Use HoodieEngineContext to parallelize fetching of partition paths

* Adding testClass for FileSystemBackedTableMetadata

Co-authored-by: Nishith Agarwal <nagarwal@uber.com>
This commit is contained in:
Udit Mehrotra
2021-01-10 21:19:52 -08:00
committed by GitHub
parent 23e93d05c0
commit 7ce3ac778e
38 changed files with 509 additions and 100 deletions

View File

@@ -19,6 +19,7 @@
package org.apache.hudi.integ.testsuite.reader;
import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.client.common.HoodieSparkEngineContext;
import org.apache.hudi.common.config.HoodieMetadataConfig;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.FileSlice;
@@ -86,7 +87,8 @@ public class DFSHoodieDatasetInputReader extends DFSDeltaInputReader {
// Using FSUtils.getFS here instead of metaClient.getFS() since we dont want to count these listStatus
// calls in metrics as they are not part of normal HUDI operation.
FileSystem fs = FSUtils.getFs(metaClient.getBasePath(), metaClient.getHadoopConf());
List<String> partitionPaths = FSUtils.getAllPartitionPaths(fs, metaClient.getBasePath(),
HoodieSparkEngineContext engineContext = new HoodieSparkEngineContext(jsc);
List<String> partitionPaths = FSUtils.getAllPartitionPaths(engineContext, fs, metaClient.getBasePath(),
HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS, HoodieMetadataConfig.DEFAULT_METADATA_VALIDATE, false);
// Sort partition so we can pick last N partitions by default
Collections.sort(partitionPaths);