[HUDI 1308] Harden RFC-15 Implementation based on production testing (#2441)
Addresses leaks, perf degradation observed during testing. These were regressions from the original rfc-15 PoC implementation. * Pass a single instance of HoodieTableMetadata everywhere * Fix tests and add config for enabling metrics - Removed special casing of assumeDatePartitioning inside FSUtils#getAllPartitionPaths() - Consequently, IOException is never thrown and many files had to be adjusted - More diligent handling of open file handles in metadata table - Added config for controlling reuse of connections - Added config for turning off fallback to listing, so we can see tests fail - Changed all ipf listing code to cache/amortize the open/close for better performance - Timelineserver also reuses connections, for better performance - Without timelineserver, when metadata table is opened from executors, reuse is not allowed - HoodieMetadataConfig passed into HoodieTableMetadata#create as argument. - Fix TestHoodieBackedTableMetadata#testSync
This commit is contained in:
@@ -373,7 +373,7 @@ public class TestBootstrap extends HoodieClientTestBase {
|
||||
reloadInputFormats();
|
||||
List<GenericRecord> records = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(
|
||||
jsc.hadoopConfiguration(),
|
||||
FSUtils.getAllPartitionPaths(context, metaClient.getFs(), basePath, HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS,
|
||||
FSUtils.getAllPartitionPaths(context, basePath, HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS,
|
||||
HoodieMetadataConfig.DEFAULT_METADATA_VALIDATE, false).stream()
|
||||
.map(f -> basePath + "/" + f).collect(Collectors.toList()),
|
||||
basePath, roJobConf, false, schema, TRIP_HIVE_COLUMN_TYPES, false, new ArrayList<>());
|
||||
@@ -392,7 +392,7 @@ public class TestBootstrap extends HoodieClientTestBase {
|
||||
seenKeys = new HashSet<>();
|
||||
records = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(
|
||||
jsc.hadoopConfiguration(),
|
||||
FSUtils.getAllPartitionPaths(context, metaClient.getFs(), basePath, HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS,
|
||||
FSUtils.getAllPartitionPaths(context, basePath, HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS,
|
||||
HoodieMetadataConfig.DEFAULT_METADATA_VALIDATE, false).stream()
|
||||
.map(f -> basePath + "/" + f).collect(Collectors.toList()),
|
||||
basePath, rtJobConf, true, schema, TRIP_HIVE_COLUMN_TYPES, false, new ArrayList<>());
|
||||
@@ -409,7 +409,7 @@ public class TestBootstrap extends HoodieClientTestBase {
|
||||
reloadInputFormats();
|
||||
records = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(
|
||||
jsc.hadoopConfiguration(),
|
||||
FSUtils.getAllPartitionPaths(context, metaClient.getFs(), basePath, HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS,
|
||||
FSUtils.getAllPartitionPaths(context, basePath, HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS,
|
||||
HoodieMetadataConfig.DEFAULT_METADATA_VALIDATE, false).stream()
|
||||
.map(f -> basePath + "/" + f).collect(Collectors.toList()),
|
||||
basePath, roJobConf, false, schema, TRIP_HIVE_COLUMN_TYPES,
|
||||
@@ -427,7 +427,7 @@ public class TestBootstrap extends HoodieClientTestBase {
|
||||
seenKeys = new HashSet<>();
|
||||
records = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(
|
||||
jsc.hadoopConfiguration(),
|
||||
FSUtils.getAllPartitionPaths(context, metaClient.getFs(), basePath, HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS,
|
||||
FSUtils.getAllPartitionPaths(context, basePath, HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS,
|
||||
HoodieMetadataConfig.DEFAULT_METADATA_VALIDATE, false).stream()
|
||||
.map(f -> basePath + "/" + f).collect(Collectors.toList()),
|
||||
basePath, rtJobConf, true, schema, TRIP_HIVE_COLUMN_TYPES, true,
|
||||
@@ -443,7 +443,7 @@ public class TestBootstrap extends HoodieClientTestBase {
|
||||
reloadInputFormats();
|
||||
records = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(
|
||||
jsc.hadoopConfiguration(),
|
||||
FSUtils.getAllPartitionPaths(context, metaClient.getFs(), basePath, HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS,
|
||||
FSUtils.getAllPartitionPaths(context, basePath, HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS,
|
||||
HoodieMetadataConfig.DEFAULT_METADATA_VALIDATE, false).stream()
|
||||
.map(f -> basePath + "/" + f).collect(Collectors.toList()),
|
||||
basePath, roJobConf, false, schema, TRIP_HIVE_COLUMN_TYPES, true,
|
||||
@@ -461,7 +461,7 @@ public class TestBootstrap extends HoodieClientTestBase {
|
||||
seenKeys = new HashSet<>();
|
||||
records = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(
|
||||
jsc.hadoopConfiguration(),
|
||||
FSUtils.getAllPartitionPaths(context, metaClient.getFs(), basePath, HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS,
|
||||
FSUtils.getAllPartitionPaths(context, basePath, HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS,
|
||||
HoodieMetadataConfig.DEFAULT_METADATA_VALIDATE, false).stream()
|
||||
.map(f -> basePath + "/" + f).collect(Collectors.toList()),
|
||||
basePath, rtJobConf, true, schema, TRIP_HIVE_COLUMN_TYPES, true,
|
||||
|
||||
Reference in New Issue
Block a user