1
0

[HUDI-2875] Make HoodieParquetWriter Thread safe and memory executor exit gracefully (#4264)

This commit is contained in:
guanziyue
2022-05-06 04:49:34 +08:00
committed by GitHub
parent d794f4fbf9
commit abb4893b25
17 changed files with 121 additions and 12 deletions

View File

@@ -95,6 +95,7 @@ public class SparkLazyInsertIterable<T extends HoodieRecordPayload> extends Hood
} finally {
if (null != bufferedIteratorExecutor) {
bufferedIteratorExecutor.shutdownNow();
bufferedIteratorExecutor.awaitTermination();
}
}
}

View File

@@ -80,10 +80,11 @@ class OrcBootstrapMetadataHandler extends BaseBootstrapMetadataHandler {
} catch (Exception e) {
throw new HoodieException(e);
} finally {
bootstrapHandle.close();
if (null != wrapper) {
wrapper.shutdownNow();
wrapper.awaitTermination();
}
bootstrapHandle.close();
}
}
}

View File

@@ -68,9 +68,9 @@ class ParquetBootstrapMetadataHandler extends BaseBootstrapMetadataHandler {
void executeBootstrap(HoodieBootstrapHandle<?, ?, ?, ?> bootstrapHandle,
Path sourceFilePath, KeyGeneratorInterface keyGenerator, String partitionPath, Schema avroSchema) throws Exception {
BoundedInMemoryExecutor<GenericRecord, HoodieRecord, Void> wrapper = null;
ParquetReader<IndexedRecord> reader =
AvroParquetReader.<IndexedRecord>builder(sourceFilePath).withConf(table.getHadoopConf()).build();
try {
ParquetReader<IndexedRecord> reader =
AvroParquetReader.<IndexedRecord>builder(sourceFilePath).withConf(table.getHadoopConf()).build();
wrapper = new BoundedInMemoryExecutor<GenericRecord, HoodieRecord, Void>(config.getWriteBufferLimitBytes(),
new ParquetReaderIterator(reader), new BootstrapRecordConsumer(bootstrapHandle), inp -> {
String recKey = keyGenerator.getKey(inp).getRecordKey();
@@ -84,10 +84,12 @@ class ParquetBootstrapMetadataHandler extends BaseBootstrapMetadataHandler {
} catch (Exception e) {
throw new HoodieException(e);
} finally {
bootstrapHandle.close();
reader.close();
if (null != wrapper) {
wrapper.shutdownNow();
wrapper.awaitTermination();
}
bootstrapHandle.close();
}
}
}