Support union mode in HoodieRealtimeRecordReader for pure insert workloads
Also Replace BufferedIteratorPayload abstraction with function passing
This commit is contained in:
committed by
vinoth chandar
parent
93f345a032
commit
dfc0c61eb7
@@ -16,39 +16,35 @@
|
||||
|
||||
package com.uber.hoodie.func;
|
||||
|
||||
import static com.uber.hoodie.func.LazyInsertIterable.getTransformFunction;
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import com.uber.hoodie.common.HoodieTestDataGenerator;
|
||||
import com.uber.hoodie.common.model.HoodieRecord;
|
||||
import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
|
||||
import com.uber.hoodie.common.util.queue.BoundedInMemoryQueueConsumer;
|
||||
import com.uber.hoodie.config.HoodieWriteConfig;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.function.Function;
|
||||
import java.util.Optional;
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
import org.junit.After;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import scala.Tuple2;
|
||||
|
||||
public class TestBufferedIteratorExecutor {
|
||||
public class TestBoundedInMemoryExecutor {
|
||||
|
||||
private final HoodieTestDataGenerator hoodieTestDataGenerator = new HoodieTestDataGenerator();
|
||||
private final String commitTime = HoodieActiveTimeline.createNewCommitTime();
|
||||
private ExecutorService executorService = null;
|
||||
|
||||
@Before
|
||||
public void beforeTest() {
|
||||
this.executorService = Executors.newFixedThreadPool(1);
|
||||
}
|
||||
private SparkBoundedInMemoryExecutor<HoodieRecord,
|
||||
Tuple2<HoodieRecord, Optional<IndexedRecord>>, Integer> executor = null;
|
||||
|
||||
@After
|
||||
public void afterTest() {
|
||||
if (this.executorService != null) {
|
||||
this.executorService.shutdownNow();
|
||||
this.executorService = null;
|
||||
if (this.executor != null) {
|
||||
this.executor.shutdownNow();
|
||||
this.executor = null;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -59,21 +55,32 @@ public class TestBufferedIteratorExecutor {
|
||||
|
||||
HoodieWriteConfig hoodieWriteConfig = mock(HoodieWriteConfig.class);
|
||||
when(hoodieWriteConfig.getWriteBufferLimitBytes()).thenReturn(1024);
|
||||
BufferedIteratorExecutor bufferedIteratorExecutor = new BufferedIteratorExecutor(hoodieWriteConfig,
|
||||
hoodieRecords.iterator(), LazyInsertIterable.bufferedItrPayloadTransform(HoodieTestDataGenerator.avroSchema),
|
||||
executorService);
|
||||
Function<BufferedIterator, Integer> function = (bufferedIterator) -> {
|
||||
Integer count = 0;
|
||||
while (bufferedIterator.hasNext()) {
|
||||
count++;
|
||||
bufferedIterator.next();
|
||||
}
|
||||
return count;
|
||||
};
|
||||
Future<Integer> future = bufferedIteratorExecutor.start(function);
|
||||
BoundedInMemoryQueueConsumer<Tuple2<HoodieRecord, Optional<IndexedRecord>>, Integer> consumer =
|
||||
new BoundedInMemoryQueueConsumer<Tuple2<HoodieRecord, Optional<IndexedRecord>>, Integer>() {
|
||||
|
||||
private int count = 0;
|
||||
|
||||
@Override
|
||||
protected void consumeOneRecord(Tuple2<HoodieRecord, Optional<IndexedRecord>> record) {
|
||||
count++;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void finish() {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Integer getResult() {
|
||||
return count;
|
||||
}
|
||||
};
|
||||
|
||||
executor = new SparkBoundedInMemoryExecutor(hoodieWriteConfig,
|
||||
hoodieRecords.iterator(), consumer, getTransformFunction(HoodieTestDataGenerator.avroSchema));
|
||||
int result = executor.execute();
|
||||
// It should buffer and write 100 records
|
||||
Assert.assertEquals((int) future.get(), 100);
|
||||
Assert.assertEquals(result, 100);
|
||||
// There should be no remaining records in the buffer
|
||||
Assert.assertFalse(bufferedIteratorExecutor.isRemaining());
|
||||
Assert.assertFalse(executor.isRemaining());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,336 @@
|
||||
/*
|
||||
* Copyright (c) 2018 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.func;
|
||||
|
||||
import static com.uber.hoodie.func.LazyInsertIterable.getTransformFunction;
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import com.uber.hoodie.common.HoodieTestDataGenerator;
|
||||
import com.uber.hoodie.common.model.HoodieRecord;
|
||||
import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
|
||||
import com.uber.hoodie.common.util.DefaultSizeEstimator;
|
||||
import com.uber.hoodie.common.util.SizeEstimator;
|
||||
import com.uber.hoodie.common.util.queue.BoundedInMemoryQueue;
|
||||
import com.uber.hoodie.common.util.queue.BoundedInMemoryQueueProducer;
|
||||
import com.uber.hoodie.common.util.queue.FunctionBasedQueueProducer;
|
||||
import com.uber.hoodie.common.util.queue.IteratorBasedQueueProducer;
|
||||
import com.uber.hoodie.exception.HoodieException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.Semaphore;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.junit.After;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import scala.Tuple2;
|
||||
|
||||
public class TestBoundedInMemoryQueue {
|
||||
|
||||
private final HoodieTestDataGenerator hoodieTestDataGenerator = new HoodieTestDataGenerator();
|
||||
private final String commitTime = HoodieActiveTimeline.createNewCommitTime();
|
||||
private ExecutorService executorService = null;
|
||||
|
||||
@Before
|
||||
public void beforeTest() {
|
||||
this.executorService = Executors.newFixedThreadPool(2);
|
||||
}
|
||||
|
||||
@After
|
||||
public void afterTest() {
|
||||
if (this.executorService != null) {
|
||||
this.executorService.shutdownNow();
|
||||
this.executorService = null;
|
||||
}
|
||||
}
|
||||
|
||||
// Test to ensure that we are reading all records from queue iterator in the same order
|
||||
// without any exceptions.
|
||||
@SuppressWarnings("unchecked")
|
||||
@Test(timeout = 60000)
|
||||
public void testRecordReading() throws Exception {
|
||||
final int numRecords = 128;
|
||||
final List<HoodieRecord> hoodieRecords = hoodieTestDataGenerator.generateInserts(commitTime, numRecords);
|
||||
final BoundedInMemoryQueue<HoodieRecord,
|
||||
Tuple2<HoodieRecord, Optional<IndexedRecord>>> queue = new BoundedInMemoryQueue(FileUtils.ONE_KB,
|
||||
getTransformFunction(HoodieTestDataGenerator.avroSchema));
|
||||
// Produce
|
||||
Future<Boolean> resFuture =
|
||||
executorService.submit(() -> {
|
||||
new IteratorBasedQueueProducer<>(hoodieRecords.iterator()).produce(queue);
|
||||
queue.close();
|
||||
return true;
|
||||
});
|
||||
final Iterator<HoodieRecord> originalRecordIterator = hoodieRecords.iterator();
|
||||
int recordsRead = 0;
|
||||
while (queue.iterator().hasNext()) {
|
||||
final HoodieRecord originalRecord = originalRecordIterator.next();
|
||||
final Optional<IndexedRecord> originalInsertValue = originalRecord.getData()
|
||||
.getInsertValue(HoodieTestDataGenerator.avroSchema);
|
||||
final Tuple2<HoodieRecord, Optional<IndexedRecord>> payload = queue.iterator().next();
|
||||
// Ensure that record ordering is guaranteed.
|
||||
Assert.assertEquals(originalRecord, payload._1());
|
||||
// cached insert value matches the expected insert value.
|
||||
Assert.assertEquals(originalInsertValue,
|
||||
payload._1().getData().getInsertValue(HoodieTestDataGenerator.avroSchema));
|
||||
recordsRead++;
|
||||
}
|
||||
Assert.assertFalse(queue.iterator().hasNext() || originalRecordIterator.hasNext());
|
||||
// all the records should be read successfully.
|
||||
Assert.assertEquals(numRecords, recordsRead);
|
||||
// should not throw any exceptions.
|
||||
resFuture.get();
|
||||
}
|
||||
|
||||
/**
|
||||
* Test to ensure that we are reading all records from queue iterator when we have multiple producers
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
@Test(timeout = 60000)
|
||||
public void testCompositeProducerRecordReading() throws Exception {
|
||||
final int numRecords = 1000;
|
||||
final int numProducers = 40;
|
||||
final List<List<HoodieRecord>> recs = new ArrayList<>();
|
||||
|
||||
final BoundedInMemoryQueue<HoodieRecord, Tuple2<HoodieRecord, Optional<IndexedRecord>>> queue =
|
||||
new BoundedInMemoryQueue(FileUtils.ONE_KB, getTransformFunction(HoodieTestDataGenerator.avroSchema));
|
||||
|
||||
// Record Key to <Producer Index, Rec Index within a producer>
|
||||
Map<String, Tuple2<Integer, Integer>> keyToProducerAndIndexMap = new HashMap<>();
|
||||
|
||||
for (int i = 0; i < numProducers; i++) {
|
||||
List<HoodieRecord> pRecs = hoodieTestDataGenerator.generateInserts(commitTime, numRecords);
|
||||
int j = 0;
|
||||
for (HoodieRecord r : pRecs) {
|
||||
Assert.assertTrue(!keyToProducerAndIndexMap.containsKey(r.getRecordKey()));
|
||||
keyToProducerAndIndexMap.put(r.getRecordKey(), new Tuple2<>(i, j));
|
||||
j++;
|
||||
}
|
||||
recs.add(pRecs);
|
||||
}
|
||||
|
||||
List<BoundedInMemoryQueueProducer<HoodieRecord>> producers = new ArrayList<>();
|
||||
for (int i = 0; i < recs.size(); i++) {
|
||||
final List<HoodieRecord> r = recs.get(i);
|
||||
// Alternate between pull and push based iterators
|
||||
if (i % 2 == 0) {
|
||||
producers.add(new IteratorBasedQueueProducer<>(r.iterator()));
|
||||
} else {
|
||||
producers.add(new FunctionBasedQueueProducer<HoodieRecord>((buf) -> {
|
||||
Iterator<HoodieRecord> itr = r.iterator();
|
||||
while (itr.hasNext()) {
|
||||
try {
|
||||
buf.insertRecord(itr.next());
|
||||
} catch (Exception e) {
|
||||
throw new HoodieException(e);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
final List<Future<Boolean>> futureList = producers.stream().map(producer -> {
|
||||
return executorService.submit(() -> {
|
||||
producer.produce(queue);
|
||||
return true;
|
||||
});
|
||||
}).collect(Collectors.toList());
|
||||
|
||||
// Close queue
|
||||
Future<Boolean> closeFuture = executorService.submit(() -> {
|
||||
try {
|
||||
for (Future f : futureList) {
|
||||
f.get();
|
||||
}
|
||||
queue.close();
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
return true;
|
||||
});
|
||||
|
||||
// Used to ensure that consumer sees the records generated by a single producer in FIFO order
|
||||
Map<Integer, Integer> lastSeenMap = IntStream.range(0, numProducers).boxed()
|
||||
.collect(Collectors.toMap(Function.identity(), x -> -1));
|
||||
Map<Integer, Integer> countMap = IntStream.range(0, numProducers).boxed()
|
||||
.collect(Collectors.toMap(Function.identity(), x -> 0));
|
||||
|
||||
// Read recs and ensure we have covered all producer recs.
|
||||
while (queue.iterator().hasNext()) {
|
||||
final Tuple2<HoodieRecord, Optional<IndexedRecord>> payload = queue.iterator().next();
|
||||
final HoodieRecord rec = payload._1();
|
||||
Tuple2<Integer, Integer> producerPos = keyToProducerAndIndexMap.get(rec.getRecordKey());
|
||||
Integer lastSeenPos = lastSeenMap.get(producerPos._1());
|
||||
countMap.put(producerPos._1(), countMap.get(producerPos._1()) + 1);
|
||||
lastSeenMap.put(producerPos._1(), lastSeenPos + 1);
|
||||
// Ensure we are seeing the next record generated
|
||||
Assert.assertEquals(lastSeenPos + 1, producerPos._2().intValue());
|
||||
}
|
||||
|
||||
for (int i = 0; i < numProducers; i++) {
|
||||
// Ensure we have seen all the records for each producers
|
||||
Assert.assertEquals(Integer.valueOf(numRecords), countMap.get(i));
|
||||
}
|
||||
|
||||
//Ensure Close future is done
|
||||
closeFuture.get();
|
||||
}
|
||||
|
||||
// Test to ensure that record queueing is throttled when we hit memory limit.
|
||||
@SuppressWarnings("unchecked")
|
||||
@Test(timeout = 60000)
|
||||
public void testMemoryLimitForBuffering() throws Exception {
|
||||
final int numRecords = 128;
|
||||
final List<HoodieRecord> hoodieRecords = hoodieTestDataGenerator.generateInserts(commitTime, numRecords);
|
||||
// maximum number of records to keep in memory.
|
||||
final int recordLimit = 5;
|
||||
final SizeEstimator<Tuple2<HoodieRecord, Optional<IndexedRecord>>> sizeEstimator =
|
||||
new DefaultSizeEstimator<>();
|
||||
final long objSize = sizeEstimator.sizeEstimate(
|
||||
getTransformFunction(HoodieTestDataGenerator.avroSchema).apply(hoodieRecords.get(0)));
|
||||
final long memoryLimitInBytes = recordLimit * objSize;
|
||||
final BoundedInMemoryQueue<HoodieRecord, Tuple2<HoodieRecord, Optional<IndexedRecord>>> queue =
|
||||
new BoundedInMemoryQueue(memoryLimitInBytes,
|
||||
getTransformFunction(HoodieTestDataGenerator.avroSchema));
|
||||
|
||||
// Produce
|
||||
Future<Boolean> resFuture = executorService.submit(() -> {
|
||||
new IteratorBasedQueueProducer<>(hoodieRecords.iterator()).produce(queue);
|
||||
return true;
|
||||
});
|
||||
// waiting for permits to expire.
|
||||
while (!isQueueFull(queue.rateLimiter)) {
|
||||
Thread.sleep(10);
|
||||
}
|
||||
Assert.assertEquals(0, queue.rateLimiter.availablePermits());
|
||||
Assert.assertEquals(recordLimit, queue.currentRateLimit);
|
||||
Assert.assertEquals(recordLimit, queue.size());
|
||||
Assert.assertEquals(recordLimit - 1, queue.samplingRecordCounter.get());
|
||||
|
||||
// try to read 2 records.
|
||||
Assert.assertEquals(hoodieRecords.get(0), queue.iterator().next()._1());
|
||||
Assert.assertEquals(hoodieRecords.get(1), queue.iterator().next()._1());
|
||||
|
||||
// waiting for permits to expire.
|
||||
while (!isQueueFull(queue.rateLimiter)) {
|
||||
Thread.sleep(10);
|
||||
}
|
||||
// No change is expected in rate limit or number of queued records. We only expect
|
||||
// queueing thread to read
|
||||
// 2 more records into the queue.
|
||||
Assert.assertEquals(0, queue.rateLimiter.availablePermits());
|
||||
Assert.assertEquals(recordLimit, queue.currentRateLimit);
|
||||
Assert.assertEquals(recordLimit, queue.size());
|
||||
Assert.assertEquals(recordLimit - 1 + 2, queue.samplingRecordCounter.get());
|
||||
}
|
||||
|
||||
// Test to ensure that exception in either queueing thread or BufferedIterator-reader thread
|
||||
// is propagated to
|
||||
// another thread.
|
||||
@SuppressWarnings("unchecked")
|
||||
@Test(timeout = 60000)
|
||||
public void testException() throws Exception {
|
||||
final int numRecords = 256;
|
||||
final List<HoodieRecord> hoodieRecords = hoodieTestDataGenerator.generateInserts(commitTime, numRecords);
|
||||
final SizeEstimator<Tuple2<HoodieRecord, Optional<IndexedRecord>>> sizeEstimator =
|
||||
new DefaultSizeEstimator<>();
|
||||
// queue memory limit
|
||||
final long objSize = sizeEstimator.sizeEstimate(
|
||||
getTransformFunction(HoodieTestDataGenerator.avroSchema).apply(hoodieRecords.get(0)));
|
||||
final long memoryLimitInBytes = 4 * objSize;
|
||||
|
||||
// first let us throw exception from queueIterator reader and test that queueing thread
|
||||
// stops and throws
|
||||
// correct exception back.
|
||||
BoundedInMemoryQueue<HoodieRecord, Tuple2<HoodieRecord, Optional<IndexedRecord>>> queue1 =
|
||||
new BoundedInMemoryQueue(memoryLimitInBytes, getTransformFunction(HoodieTestDataGenerator.avroSchema));
|
||||
|
||||
// Produce
|
||||
Future<Boolean> resFuture = executorService.submit(() -> {
|
||||
new IteratorBasedQueueProducer<>(hoodieRecords.iterator()).produce(queue1);
|
||||
return true;
|
||||
});
|
||||
|
||||
// waiting for permits to expire.
|
||||
while (!isQueueFull(queue1.rateLimiter)) {
|
||||
Thread.sleep(10);
|
||||
}
|
||||
// notify queueing thread of an exception and ensure that it exits.
|
||||
final Exception e = new Exception("Failing it :)");
|
||||
queue1.markAsFailed(e);
|
||||
try {
|
||||
resFuture.get();
|
||||
Assert.fail("exception is expected");
|
||||
} catch (ExecutionException e1) {
|
||||
Assert.assertEquals(HoodieException.class, e1.getCause().getClass());
|
||||
Assert.assertEquals(e, e1.getCause().getCause());
|
||||
}
|
||||
|
||||
// second let us raise an exception while doing record queueing. this exception should get
|
||||
// propagated to
|
||||
// queue iterator reader.
|
||||
final RuntimeException expectedException = new RuntimeException("failing record reading");
|
||||
final Iterator<HoodieRecord> mockHoodieRecordsIterator = mock(Iterator.class);
|
||||
when(mockHoodieRecordsIterator.hasNext()).thenReturn(true);
|
||||
when(mockHoodieRecordsIterator.next()).thenThrow(expectedException);
|
||||
BoundedInMemoryQueue<HoodieRecord, Tuple2<HoodieRecord, Optional<IndexedRecord>>> queue2 =
|
||||
new BoundedInMemoryQueue(memoryLimitInBytes, getTransformFunction(HoodieTestDataGenerator.avroSchema));
|
||||
|
||||
// Produce
|
||||
Future<Boolean> res = executorService.submit(() -> {
|
||||
try {
|
||||
new IteratorBasedQueueProducer<>(mockHoodieRecordsIterator).produce(queue2);
|
||||
} catch (Exception ex) {
|
||||
queue2.markAsFailed(ex);
|
||||
throw ex;
|
||||
}
|
||||
return true;
|
||||
});
|
||||
|
||||
try {
|
||||
queue2.iterator().hasNext();
|
||||
Assert.fail("exception is expected");
|
||||
} catch (Exception e1) {
|
||||
Assert.assertEquals(expectedException, e1.getCause());
|
||||
}
|
||||
// queueing thread should also have exited. make sure that it is not running.
|
||||
try {
|
||||
res.get();
|
||||
Assert.fail("exception is expected");
|
||||
} catch (ExecutionException e2) {
|
||||
Assert.assertEquals(expectedException, e2.getCause());
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isQueueFull(Semaphore rateLimiter) {
|
||||
return (rateLimiter.availablePermits() == 0 && rateLimiter.hasQueuedThreads());
|
||||
}
|
||||
}
|
||||
@@ -1,203 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2018 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.func;
|
||||
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import com.uber.hoodie.common.HoodieTestDataGenerator;
|
||||
import com.uber.hoodie.common.model.HoodieRecord;
|
||||
import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
|
||||
import com.uber.hoodie.exception.HoodieException;
|
||||
import com.uber.hoodie.func.payload.AbstractBufferedIteratorPayload;
|
||||
import com.uber.hoodie.func.payload.HoodieRecordBufferedIteratorPayload;
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.Semaphore;
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.spark.util.SizeEstimator;
|
||||
import org.junit.After;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestBufferedIterator {
|
||||
|
||||
private final HoodieTestDataGenerator hoodieTestDataGenerator = new HoodieTestDataGenerator();
|
||||
private final String commitTime = HoodieActiveTimeline.createNewCommitTime();
|
||||
private ExecutorService recordReader = null;
|
||||
|
||||
@Before
|
||||
public void beforeTest() {
|
||||
this.recordReader = Executors.newFixedThreadPool(1);
|
||||
}
|
||||
|
||||
@After
|
||||
public void afterTest() {
|
||||
if (this.recordReader != null) {
|
||||
this.recordReader.shutdownNow();
|
||||
this.recordReader = null;
|
||||
}
|
||||
}
|
||||
|
||||
// Test to ensure that we are reading all records from buffered iterator in the same order
|
||||
// without any exceptions.
|
||||
@Test(timeout = 60000)
|
||||
public void testRecordReading() throws IOException, ExecutionException, InterruptedException {
|
||||
final int numRecords = 128;
|
||||
final List<HoodieRecord> hoodieRecords = hoodieTestDataGenerator.generateInserts(commitTime, numRecords);
|
||||
final BufferedIterator bufferedIterator = new BufferedIterator(hoodieRecords.iterator(), FileUtils.ONE_KB,
|
||||
LazyInsertIterable.bufferedItrPayloadTransform(HoodieTestDataGenerator.avroSchema));
|
||||
Future<Boolean> result = recordReader.submit(() -> {
|
||||
bufferedIterator.startBuffering();
|
||||
return true;
|
||||
});
|
||||
final Iterator<HoodieRecord> originalRecordIterator = hoodieRecords.iterator();
|
||||
int recordsRead = 0;
|
||||
while (bufferedIterator.hasNext()) {
|
||||
final HoodieRecord originalRecord = originalRecordIterator.next();
|
||||
final Optional<IndexedRecord> originalInsertValue = originalRecord.getData()
|
||||
.getInsertValue(HoodieTestDataGenerator.avroSchema);
|
||||
final HoodieRecordBufferedIteratorPayload payload = (HoodieRecordBufferedIteratorPayload) bufferedIterator.next();
|
||||
// Ensure that record ordering is guaranteed.
|
||||
Assert.assertEquals(originalRecord, payload.getInputPayload());
|
||||
// cached insert value matches the expected insert value.
|
||||
Assert.assertEquals(originalInsertValue,
|
||||
((HoodieRecord) payload.getInputPayload()).getData().getInsertValue(HoodieTestDataGenerator.avroSchema));
|
||||
recordsRead++;
|
||||
}
|
||||
Assert.assertFalse(bufferedIterator.hasNext() || originalRecordIterator.hasNext());
|
||||
// all the records should be read successfully.
|
||||
Assert.assertEquals(numRecords, recordsRead);
|
||||
// should not throw any exceptions.
|
||||
Assert.assertTrue(result.get());
|
||||
}
|
||||
|
||||
// Test to ensure that record buffering is throttled when we hit memory limit.
|
||||
@Test(timeout = 60000)
|
||||
public void testMemoryLimitForBuffering() throws IOException, InterruptedException {
|
||||
final int numRecords = 128;
|
||||
final List<HoodieRecord> hoodieRecords = hoodieTestDataGenerator.generateInserts(commitTime, numRecords);
|
||||
// maximum number of records to keep in memory.
|
||||
final int recordLimit = 5;
|
||||
final long memoryLimitInBytes = recordLimit * SizeEstimator.estimate(hoodieRecords.get(0));
|
||||
final BufferedIterator<HoodieRecord, AbstractBufferedIteratorPayload> bufferedIterator =
|
||||
new BufferedIterator(hoodieRecords.iterator(), memoryLimitInBytes,
|
||||
LazyInsertIterable.bufferedItrPayloadTransform(HoodieTestDataGenerator.avroSchema));
|
||||
Future<Boolean> result = recordReader.submit(() -> {
|
||||
bufferedIterator.startBuffering();
|
||||
return true;
|
||||
});
|
||||
// waiting for permits to expire.
|
||||
while (!isQueueFull(bufferedIterator.rateLimiter)) {
|
||||
Thread.sleep(10);
|
||||
}
|
||||
Assert.assertEquals(0, bufferedIterator.rateLimiter.availablePermits());
|
||||
Assert.assertEquals(recordLimit, bufferedIterator.currentRateLimit);
|
||||
Assert.assertEquals(recordLimit, bufferedIterator.size());
|
||||
Assert.assertEquals(recordLimit - 1, bufferedIterator.samplingRecordCounter.get());
|
||||
|
||||
// try to read 2 records.
|
||||
Assert.assertEquals(hoodieRecords.get(0), bufferedIterator.next().getInputPayload());
|
||||
Assert.assertEquals(hoodieRecords.get(1), bufferedIterator.next().getInputPayload());
|
||||
|
||||
// waiting for permits to expire.
|
||||
while (!isQueueFull(bufferedIterator.rateLimiter)) {
|
||||
Thread.sleep(10);
|
||||
}
|
||||
// No change is expected in rate limit or number of buffered records. We only expect
|
||||
// buffering thread to read
|
||||
// 2 more records into the buffer.
|
||||
Assert.assertEquals(0, bufferedIterator.rateLimiter.availablePermits());
|
||||
Assert.assertEquals(recordLimit, bufferedIterator.currentRateLimit);
|
||||
Assert.assertEquals(recordLimit, bufferedIterator.size());
|
||||
Assert.assertEquals(recordLimit - 1 + 2, bufferedIterator.samplingRecordCounter.get());
|
||||
}
|
||||
|
||||
// Test to ensure that exception in either buffering thread or BufferedIterator-reader thread
|
||||
// is propagated to
|
||||
// another thread.
|
||||
@Test(timeout = 60000)
|
||||
public void testException() throws IOException, InterruptedException {
|
||||
final int numRecords = 256;
|
||||
final List<HoodieRecord> hoodieRecords = hoodieTestDataGenerator.generateInserts(commitTime, numRecords);
|
||||
// buffer memory limit
|
||||
final long memoryLimitInBytes = 4 * SizeEstimator.estimate(hoodieRecords.get(0));
|
||||
|
||||
// first let us throw exception from bufferIterator reader and test that buffering thread
|
||||
// stops and throws
|
||||
// correct exception back.
|
||||
BufferedIterator bufferedIterator1 = new BufferedIterator(hoodieRecords.iterator(), memoryLimitInBytes,
|
||||
LazyInsertIterable.bufferedItrPayloadTransform(HoodieTestDataGenerator.avroSchema));
|
||||
Future<Boolean> result = recordReader.submit(() -> {
|
||||
bufferedIterator1.startBuffering();
|
||||
return true;
|
||||
});
|
||||
// waiting for permits to expire.
|
||||
while (!isQueueFull(bufferedIterator1.rateLimiter)) {
|
||||
Thread.sleep(10);
|
||||
}
|
||||
// notify buffering thread of an exception and ensure that it exits.
|
||||
final Exception e = new Exception("Failing it :)");
|
||||
bufferedIterator1.markAsFailed(e);
|
||||
try {
|
||||
result.get();
|
||||
Assert.fail("exception is expected");
|
||||
} catch (ExecutionException e1) {
|
||||
Assert.assertEquals(HoodieException.class, e1.getCause().getClass());
|
||||
Assert.assertEquals(e, e1.getCause().getCause());
|
||||
}
|
||||
|
||||
// second let us raise an exception while doing record buffering. this exception should get
|
||||
// propagated to
|
||||
// buffered iterator reader.
|
||||
final RuntimeException expectedException = new RuntimeException("failing record reading");
|
||||
final Iterator<HoodieRecord> mockHoodieRecordsIterator = mock(Iterator.class);
|
||||
when(mockHoodieRecordsIterator.hasNext()).thenReturn(true);
|
||||
when(mockHoodieRecordsIterator.next()).thenThrow(expectedException);
|
||||
BufferedIterator bufferedIterator2 = new BufferedIterator(mockHoodieRecordsIterator, memoryLimitInBytes,
|
||||
LazyInsertIterable.bufferedItrPayloadTransform(HoodieTestDataGenerator.avroSchema));
|
||||
Future<Boolean> result2 = recordReader.submit(() -> {
|
||||
bufferedIterator2.startBuffering();
|
||||
return true;
|
||||
});
|
||||
try {
|
||||
bufferedIterator2.hasNext();
|
||||
Assert.fail("exception is expected");
|
||||
} catch (Exception e1) {
|
||||
Assert.assertEquals(expectedException, e1.getCause());
|
||||
}
|
||||
// buffering thread should also have exited. make sure that it is not running.
|
||||
try {
|
||||
result2.get();
|
||||
Assert.fail("exception is expected");
|
||||
} catch (ExecutionException e2) {
|
||||
Assert.assertEquals(expectedException, e2.getCause());
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isQueueFull(Semaphore rateLimiter) {
|
||||
return (rateLimiter.availablePermits() == 0 && rateLimiter.hasQueuedThreads());
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user