1
0

[HUDI-2875] Make HoodieParquetWriter Thread safe and memory executor exit gracefully (#4264)

This commit is contained in:
guanziyue
2022-05-06 04:49:34 +08:00
committed by GitHub
parent d794f4fbf9
commit abb4893b25
17 changed files with 121 additions and 12 deletions

View File

@@ -35,6 +35,8 @@ import org.apache.avro.generic.GenericRecord;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import javax.annotation.concurrent.NotThreadSafe;
import java.io.IOException;
import java.util.Collections;
import java.util.Iterator;
@@ -66,6 +68,7 @@ import java.util.Map;
* Users should ensure there are no duplicates when "insert" operation is used and if the respective config is enabled. So, above scenario should not
* happen and every batch should have new records to be inserted. Above example is for illustration purposes only.
*/
@NotThreadSafe
public class HoodieConcatHandle<T extends HoodieRecordPayload, I, K, O> extends HoodieMergeHandle<T, I, K, O> {
private static final Logger LOG = LogManager.getLogger(HoodieConcatHandle.class);

View File

@@ -42,12 +42,15 @@ import org.apache.hudi.table.HoodieTable;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import javax.annotation.concurrent.NotThreadSafe;
import java.io.IOException;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
@NotThreadSafe
public class HoodieCreateHandle<T extends HoodieRecordPayload, I, K, O> extends HoodieWriteHandle<T, I, K, O> {
private static final Logger LOG = LogManager.getLogger(HoodieCreateHandle.class);

View File

@@ -54,6 +54,8 @@ import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import javax.annotation.concurrent.NotThreadSafe;
import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
@@ -91,6 +93,7 @@ import java.util.Set;
*
* </p>
*/
@NotThreadSafe
public class HoodieMergeHandle<T extends HoodieRecordPayload, I, K, O> extends HoodieWriteHandle<T, I, K, O> {
private static final Logger LOG = LogManager.getLogger(HoodieMergeHandle.class);

View File

@@ -32,6 +32,8 @@ import org.apache.hudi.table.HoodieTable;
import org.apache.avro.generic.GenericRecord;
import javax.annotation.concurrent.NotThreadSafe;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
@@ -45,6 +47,7 @@ import java.util.Queue;
* The implementation performs a merge-sort by comparing the key of the record being written to the list of
* keys in newRecordKeys (sorted in-memory).
*/
@NotThreadSafe
public class HoodieSortedMergeHandle<T extends HoodieRecordPayload, I, K, O> extends HoodieMergeHandle<T, I, K, O> {
private final Queue<String> newRecordKeysSorted = new PriorityQueue<>();

View File

@@ -28,11 +28,14 @@ import org.apache.hudi.table.HoodieTable;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import javax.annotation.concurrent.NotThreadSafe;
/**
* A HoodieCreateHandle which writes all data into a single file.
* <p>
* Please use this with caution. This can end up creating very large files if not used correctly.
*/
@NotThreadSafe
public class HoodieUnboundedCreateHandle<T extends HoodieRecordPayload, I, K, O> extends HoodieCreateHandle<T, I, K, O> {
private static final Logger LOG = LogManager.getLogger(HoodieUnboundedCreateHandle.class);

View File

@@ -30,13 +30,18 @@ import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.parquet.hadoop.ParquetFileWriter;
import org.apache.parquet.hadoop.ParquetWriter;
import javax.annotation.concurrent.NotThreadSafe;
import java.io.IOException;
import java.util.concurrent.atomic.AtomicLong;
/**
* HoodieParquetWriter extends the ParquetWriter to help limit the size of underlying file. Provides a way to check if
* the current file can take more records with the <code>canWrite()</code>
*
* ATTENTION: HoodieParquetWriter is not thread safe and developer should take care of the order of write and close
*/
@NotThreadSafe
public class HoodieParquetWriter<T extends HoodieRecordPayload, R extends IndexedRecord>
extends ParquetWriter<IndexedRecord> implements HoodieFileWriter<R> {
@@ -106,4 +111,9 @@ public class HoodieParquetWriter<T extends HoodieRecordPayload, R extends Indexe
writeSupport.add(key);
}
}
@Override
public void close() throws IOException {
super.close();
}
}

View File

@@ -148,13 +148,16 @@ public class HoodieMergeHelper<T extends HoodieRecordPayload> extends
} catch (Exception e) {
throw new HoodieException(e);
} finally {
// HUDI-2875: mergeHandle is not thread safe, we should totally terminate record inputting
// and executor firstly and then close mergeHandle.
if (reader != null) {
reader.close();
}
mergeHandle.close();
if (null != wrapper) {
wrapper.shutdownNow();
wrapper.awaitTermination();
}
mergeHandle.close();
}
}
}