1
0

[HUDI-1055] Remove hardcoded parquet in tests (#2740)

* Remove hardcoded parquet in tests
* Use DataFileUtils.getInstance
* Renaming DataFileUtils to BaseFileUtils

Co-authored-by: Vinoth Chandar <vinoth@apache.org>
This commit is contained in:
TeRS-K
2021-05-11 13:01:45 -04:00
committed by GitHub
parent ac72470e10
commit be9db2c4f5
42 changed files with 359 additions and 218 deletions

View File

@@ -27,7 +27,7 @@ import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecordLocation;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.util.ParquetUtils;
import org.apache.hudi.common.util.BaseFileUtils;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.configuration.FlinkOptions;
import org.apache.hudi.exception.HoodieException;
@@ -221,6 +221,7 @@ public class BucketAssignFunction<K, I, O extends HoodieRecord<?>>
private void loadRecords(String partitionPath) throws Exception {
LOG.info("Start loading records under partition {} into the index state", partitionPath);
HoodieTable<?, ?, ?, ?> hoodieTable = bucketAssigner.getTable();
BaseFileUtils fileUtils = BaseFileUtils.getInstance(hoodieTable.getBaseFileFormat());
List<HoodieBaseFile> latestBaseFiles =
HoodieIndexUtils.getLatestBaseFilesForPartition(partitionPath, hoodieTable);
final int parallelism = getRuntimeContext().getNumberOfParallelSubtasks();
@@ -230,7 +231,7 @@ public class BucketAssignFunction<K, I, O extends HoodieRecord<?>>
final List<HoodieKey> hoodieKeys;
try {
hoodieKeys =
ParquetUtils.fetchRecordKeyPartitionPathFromParquet(hadoopConf, new Path(baseFile.getPath()));
fileUtils.fetchRecordKeyPartitionPath(hadoopConf, new Path(baseFile.getPath()));
} catch (Exception e) {
// in case there was some empty parquet file when the pipeline
// crushes exceptionally.