1
0

[HUDI-2439] Remove SparkBoundedInMemoryExecutor (#4860)

This commit is contained in:
Raymond Xu
2022-02-26 05:02:12 -08:00
committed by GitHub
parent 1379300b5b
commit c77b2591d0
14 changed files with 99 additions and 113 deletions

View File

@@ -18,18 +18,10 @@
package org.apache.hudi.hadoop.realtime;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.table.log.HoodieUnMergedLogRecordScanner;
import org.apache.hudi.common.util.DefaultSizeEstimator;
import org.apache.hudi.common.util.Functions;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.queue.BoundedInMemoryExecutor;
import org.apache.hudi.common.util.queue.BoundedInMemoryQueueProducer;
@@ -40,6 +32,18 @@ import org.apache.hudi.hadoop.SafeParquetRecordReaderWrapper;
import org.apache.hudi.hadoop.config.HoodieRealtimeConfig;
import org.apache.hudi.hadoop.utils.HoodieRealtimeRecordReaderUtils;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.function.Function;
class RealtimeUnmergedRecordReader extends AbstractRealtimeRecordReader
implements RecordReader<NullWritable, ArrayWritable> {
@@ -74,7 +78,7 @@ class RealtimeUnmergedRecordReader extends AbstractRealtimeRecordReader
this.parquetRecordsIterator = new RecordReaderValueIterator<>(this.parquetReader);
this.executor = new BoundedInMemoryExecutor<>(
HoodieRealtimeRecordReaderUtils.getMaxCompactionMemoryInBytes(jobConf), getParallelProducers(),
Option.empty(), x -> x, new DefaultSizeEstimator<>());
Option.empty(), Function.identity(), new DefaultSizeEstimator<>(), Functions.noop());
// Consumer of this record reader
this.iterator = this.executor.getQueue().iterator();
this.logRecordScanner = HoodieUnMergedLogRecordScanner.newBuilder()