1
0

Fixing small file handling, inline compaction defaults

- Small file limit is now 100MB by default
 - Turned on inline compaction by default for MOR
 - Changes take effect on DataSource and DeltaStreamer
This commit is contained in:
Vinoth Chandar
2019-03-12 15:59:41 -07:00
committed by n3nash
parent 51f4908989
commit b34a204a52
5 changed files with 22 additions and 10 deletions

View File

@@ -29,9 +29,9 @@ import org.junit.{Before, Test}
import org.scalatest.junit.AssertionsForJUnit
import scala.collection.JavaConversions._
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.Duration
import scala.concurrent.{Await, Future}
import scala.concurrent.ExecutionContext.Implicits.global
/**
* Basic tests on the spark datasource
@@ -131,6 +131,7 @@ class DataSourceTest extends AssertionsForJUnit {
val inputDF1: Dataset[Row] = spark.read.json(spark.sparkContext.parallelize(records1, 2))
inputDF1.write.format("com.uber.hoodie")
.options(commonOpts)
.option("hoodie.compact.inline", "false") // else fails due to compaction & deltacommit instant times being same
.option(DataSourceWriteOptions.OPERATION_OPT_KEY, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
.option(DataSourceWriteOptions.STORAGE_TYPE_OPT_KEY, DataSourceWriteOptions.MOR_STORAGE_TYPE_OPT_VAL)
.mode(SaveMode.Overwrite)