1
0

Perform consistency checks during write finalize

- Check to ensure written files are listable on storage
 - Docs reflected to capture how this helps with s3 storage
 - Unit tests added, corrections to existing tests
 - Fix DeltaStreamer to manage archived commits in a separate folder
This commit is contained in:
vinothchandar
2018-09-20 17:50:27 +05:30
committed by vinoth chandar
parent 4c74dd4cad
commit 9ca6f91e97
17 changed files with 381 additions and 93 deletions

View File

@@ -19,12 +19,12 @@
package com.uber.hoodie
import java.util
import java.util.Optional
import java.util.concurrent.ConcurrentHashMap
import java.util.{Optional, Properties}
import com.uber.hoodie.DataSourceReadOptions._
import com.uber.hoodie.DataSourceWriteOptions._
import com.uber.hoodie.common.table.{HoodieTableConfig, HoodieTableMetaClient}
import com.uber.hoodie.common.table.HoodieTableMetaClient
import com.uber.hoodie.common.util.{FSUtils, TypedProperties}
import com.uber.hoodie.config.HoodieWriteConfig
import com.uber.hoodie.exception.HoodieException
@@ -205,11 +205,8 @@ class DefaultSource extends RelationProvider
// Create the dataset if not present (APPEND mode)
if (!exists) {
val properties = new Properties();
properties.put(HoodieTableConfig.HOODIE_TABLE_NAME_PROP_NAME, tblName.get);
properties.put(HoodieTableConfig.HOODIE_TABLE_TYPE_PROP_NAME, storageType);
properties.put(HoodieTableConfig.HOODIE_ARCHIVELOG_FOLDER_PROP_NAME, "archived");
HoodieTableMetaClient.initializePathAsHoodieDataset(sparkContext.hadoopConfiguration, path.get, properties);
HoodieTableMetaClient.initTableType(sparkContext.hadoopConfiguration, path.get, storageType,
tblName.get, "archived")
}
// Create a HoodieWriteClient & issue the write.