1
0

Fixing small file handling, inline compaction defaults

- Small file limit is now 100MB by default
 - Turned on inline compaction by default for MOR
 - Changes take effect on DataSource and DeltaStreamer
This commit is contained in:
Vinoth Chandar
2019-03-12 15:59:41 -07:00
committed by n3nash
parent 51f4908989
commit b34a204a52
5 changed files with 22 additions and 10 deletions

View File

@@ -126,6 +126,12 @@ public class DataSourceUtils {
public static HoodieWriteClient createHoodieClient(JavaSparkContext jssc, String schemaStr,
String basePath, String tblName, Map<String, String> parameters) throws Exception {
// inline compaction is on by default for MOR
boolean inlineCompact = parameters.containsKey(DataSourceWriteOptions.STORAGE_TYPE_OPT_KEY())
&& parameters.get(DataSourceWriteOptions.STORAGE_TYPE_OPT_KEY()).equals(DataSourceWriteOptions
.MOR_STORAGE_TYPE_OPT_VAL());
HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().combineInput(true, true)
.withPath(basePath).withAutoCommit(false)
.withSchema(schemaStr).forTable(tblName).withIndexConfig(
@@ -134,6 +140,7 @@ public class DataSourceUtils {
.withPayloadClass(parameters.get(
DataSourceWriteOptions
.PAYLOAD_CLASS_OPT_KEY()))
.withInlineCompaction(inlineCompact)
.build())
// override above with Hoodie configs specified as options.
.withProps(parameters).build();