Fixing small file handling, inline compaction defaults
- Small file limit is now 100MB by default - Turned on inline compaction by default for MOR - Changes take effect on DataSource and DeltaStreamer
This commit is contained in:
@@ -126,6 +126,12 @@ public class DataSourceUtils {
|
||||
|
||||
public static HoodieWriteClient createHoodieClient(JavaSparkContext jssc, String schemaStr,
|
||||
String basePath, String tblName, Map<String, String> parameters) throws Exception {
|
||||
|
||||
// inline compaction is on by default for MOR
|
||||
boolean inlineCompact = parameters.containsKey(DataSourceWriteOptions.STORAGE_TYPE_OPT_KEY())
|
||||
&& parameters.get(DataSourceWriteOptions.STORAGE_TYPE_OPT_KEY()).equals(DataSourceWriteOptions
|
||||
.MOR_STORAGE_TYPE_OPT_VAL());
|
||||
|
||||
HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().combineInput(true, true)
|
||||
.withPath(basePath).withAutoCommit(false)
|
||||
.withSchema(schemaStr).forTable(tblName).withIndexConfig(
|
||||
@@ -134,6 +140,7 @@ public class DataSourceUtils {
|
||||
.withPayloadClass(parameters.get(
|
||||
DataSourceWriteOptions
|
||||
.PAYLOAD_CLASS_OPT_KEY()))
|
||||
.withInlineCompaction(inlineCompact)
|
||||
.build())
|
||||
// override above with Hoodie configs specified as options.
|
||||
.withProps(parameters).build();
|
||||
|
||||
@@ -29,9 +29,9 @@ import org.junit.{Before, Test}
|
||||
import org.scalatest.junit.AssertionsForJUnit
|
||||
|
||||
import scala.collection.JavaConversions._
|
||||
import scala.concurrent.ExecutionContext.Implicits.global
|
||||
import scala.concurrent.duration.Duration
|
||||
import scala.concurrent.{Await, Future}
|
||||
import scala.concurrent.ExecutionContext.Implicits.global
|
||||
|
||||
/**
|
||||
* Basic tests on the spark datasource
|
||||
@@ -131,6 +131,7 @@ class DataSourceTest extends AssertionsForJUnit {
|
||||
val inputDF1: Dataset[Row] = spark.read.json(spark.sparkContext.parallelize(records1, 2))
|
||||
inputDF1.write.format("com.uber.hoodie")
|
||||
.options(commonOpts)
|
||||
.option("hoodie.compact.inline", "false") // else fails due to compaction & deltacommit instant times being same
|
||||
.option(DataSourceWriteOptions.OPERATION_OPT_KEY, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
|
||||
.option(DataSourceWriteOptions.STORAGE_TYPE_OPT_KEY, DataSourceWriteOptions.MOR_STORAGE_TYPE_OPT_VAL)
|
||||
.mode(SaveMode.Overwrite)
|
||||
|
||||
Reference in New Issue
Block a user