1
0

[HUDI-2731] Make clustering work regardless of whether there are base… (#3970)

This commit is contained in:
Sagar Sumit
2021-11-19 21:39:08 +05:30
committed by GitHub
parent bf008762df
commit eba354e922
6 changed files with 148 additions and 43 deletions

View File

@@ -36,6 +36,7 @@ import org.apache.hudi.common.model.RewriteAvroPayload;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieClusteringException;
@@ -191,7 +192,6 @@ public abstract class MultipleSparkJobExecutionStrategy<T extends HoodieRecordPa
LOG.info("MaxMemoryPerCompaction run as part of clustering => " + maxMemoryPerCompaction);
try {
Schema readerSchema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(config.getSchema()));
HoodieFileReader<? extends IndexedRecord> baseFileReader = HoodieFileReaderFactory.getFileReader(table.getHadoopConf(), new Path(clusteringOp.getDataFilePath()));
HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder()
.withFileSystem(table.getMetaClient().getFs())
.withBasePath(table.getMetaClient().getBasePath())
@@ -205,6 +205,9 @@ public abstract class MultipleSparkJobExecutionStrategy<T extends HoodieRecordPa
.withSpillableMapBasePath(config.getSpillableMapBasePath())
.build();
Option<HoodieFileReader> baseFileReader = StringUtils.isNullOrEmpty(clusteringOp.getDataFilePath())
? Option.empty()
: Option.of(HoodieFileReaderFactory.getFileReader(table.getHadoopConf(), new Path(clusteringOp.getDataFilePath())));
HoodieTableConfig tableConfig = table.getMetaClient().getTableConfig();
recordIterators.add(getFileSliceReader(baseFileReader, scanner, readerSchema,
tableConfig.getPayloadClass(),